####################
## Author: Carlos Gould
## Project: Using high frequency household surveys to describe energy use in rural North India during the COVID-19 pandemic 
## Task: Analysis of data to produce tables and figures for manuscript
####################

# 0. Libraries ----

library(tidyverse)
library(readxl)
library(openxlsx)
library(lubridate)
library(haven)
library(arsenal)
library(cowplot)
library(ggstatsplot)
library(ggsci)
library(broom)
library(MetBrewer)
library(fixest)
library(ggalluvial)
library(UpSetR)

mycontrols  <- tableby.control(test=TRUE, total=FALSE,
                               numeric.stats=c("N", "meansd", "medianq1q3", "range", "Nmiss"),
                               cat.stats=c("countpct", "Nmiss"),
                               stats.labels=list(N='Count', meansd='Mean (SD)', medianq1q3='Median (IQR)'),
                               digits = 2L,
                               digits.pct = 0L)

# 1. Data -------
# lpg prices
lpg_prices <- readxl::read_xlsx("~/Downloads/india_covid_lpgrefill_prices.xlsx")

# covid data 
covid <- read_dta("~/Downloads/covid_infected_deaths.dta")
covid_states <- read_csv("~/Downloads/states.csv")

# india shape file
shape_adm1 <- sf::st_read("~/Downloads/india_states_shp/india_states.shp")
shape_adm2 <- sf::st_read("~/Downloads/india_states_shp_2/IND_adm2.shp")

# Bihar
bbaseline <- readxl::read_xlsx("~/BurkeLab Dropbox/Carlos Gould/Bihar LPG Cooking Project/Baseline/Shared Data/Cooking Air Pollution_Bihar LPG Shared Data_De-identified_15th Feb.xlsx", skip = 1)
bsurveys <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Bihar2020_AirPollution/Code/surveys.rds")

# Jharkhand
jsurveys <- read_csv("~/BurkeLab Dropbox/Carlos Gould/Jharkhand-COVID19/Data/jcovid_full_b_1.csv")
jbaseline <- read_dta("~/BurkeLab Dropbox/Carlos Gould/Jharkhand-COVID19/Data/jharkhand_access_data.dta") %>% 
  rename_all(funs(str_replace(.,"^", "B_"))) %>%
  rename(hhid = B_UniqueId) %>% 
  rename(B_q_14_relation_to_HH = B_q_14,
         B_q_16_age = B_q_16,
         B_q_17_gender = B_q_17,
         B_q_19_literate = B_q_19,
         B_q_19_speakHindi = B_q_19_1,
         B_q_19_writeHindi = B_q_19_2,
         
         B_q_20_HH_education = B_q_20,
         B_q_21_religion = B_q_21,
         B_q_22_caste = B_q_22,
         B_q_23_adultmen = B_q_23,
         B_q_24_adultwomen = B_q_24,
         B_q_25_children = B_q_25,
         B_q_26_children_studying = B_q_26,
         B_q_27_voterID = B_q_27,
         B_q_28_rationcard = B_q_28,
         
         B_q_29_primary_incomesource = B_q_29,
         B_q_31_monthly_expenditures = B_q_31,
         B_q_32_rupees_saved = B_q_32,
         B_q_33_woman_anyincome = B_q_33,
         
         B_q_34_land = B_q_34_1,
         B_q_34_land_unit = B_q_34_2,
         B_q_35_cows = B_q_35_1,
         B_q_35_buffaloes = B_q_35_2,
         B_q_35_chicken = B_q_35_3,
         B_q_35_cow_calves = B_q_35_4,
         B_q_35_buffalo_calves = B_q_35_5,
         B_q_35_goats = B_q_35_6,
         B_q_35_otheranimal = B_q_35_7,
         
         B_q_37_decisionmaker = B_q_37,
         B_q_38_pucca = B_q_38,
         B_q_39_toilet = B_q_39,
         B_q_40_pipedwater = B_q_40,
         B_q_41_rooms = B_q_41,
         B_q_42_beds = B_q_42,
         B_q_43_tables = B_q_43,
         B_q_44_chairs = B_q_44,
         B_q_45_bicycles = B_q_45,
         B_q_46_motorcycles = B_q_46,
         B_q_47_pressurecookers = B_q_47,
         B_q_48_34wheel_ag_equipment = B_q_48,
         B_q_49_fishingboat = B_q_49,
         B_q_50_govtemployee = B_q_50,
         B_q_51_taxpayer = B_q_51,
         
         B_q_57_primary_lighting = B_q_57,
         B_q_58_primary_lighting_meetsneeds = B_q_58_1,
         B_q_58_primary_lighting_isexpensive = B_q_58_2,
         B_q_58_primary_lighting_issafe = B_q_58_3,
         B_q_58_primary_lighting_isreliable = B_q_58_4,
         B_q_59_primary_lighting_satisfaction = B_q_59,
         
         B_q_60_grid_electricity = B_q_60,
         B_q_60_grid_electricity_years = B_q_60_1,
         B_q_60_grid_electricity_saubhagya = B_q_60_2,
         B_q_60_grid_electricity_cnxn_fee = B_q_60_3,
         B_q_60_grid_electricity_fixed_variable_fee = B_q_60_4,
         B_q_60_grid_electricity_monthly_fee = B_q_60_5_1,
         B_q_60_grid_electricity_monthly_repairs = B_q_60_5_2,
         B_q_60_grid_electricity_monthly_informalpayment = B_q_60_5_3,
         B_q_60_grid_electricity_monthly_paynongrid = B_q_60_5_4,
         
         B_q_61_microgrid_electricity = B_q_61,
         B_q_61_microgrid_electricity_source = B_q_61_1,
         B_q_61_microgrid_electricity_monthly_fee = B_q_61_2,
         
         B_q_62_solar_system = B_q_62,
         B_q_62_solar_system_hours = B_q_62_1,
         B_q_62_solar_system_lights = B_q_62_2,
         
         B_q_63_solar_lantern = B_q_63,
         B_q_63_solar_lantern_lights = B_q_63_1,
         B_q_63_solar_lantern_hours = B_q_63_2,
         
         B_q_64_kerosene_lighting = B_q_64,
         B_q_64_kerosene_wicklamp_number = B_q_64_1,
         B_q_64_kerosene_lantern_number = B_q_64_2,
         B_q_64_kerosene_lighting_hours = B_q_64_3,
         B_q_64_kerosene_lighting_PDSliters_month = B_q_64_4,
         B_q_64_kerosene_lighting_PDSliters_price = B_q_64_5,
         B_q_64_kerosene_lighting_marketliters_month = B_q_64_6,
         B_q_64_kerosene_lighting_marketliters_price = B_q_64_7,
         B_q_64_kerosene_lighting_health_effect = B_q_64_8,
         
         B_q_72_mobile_phone = B_q_72,
         
         B_q_73_electrification = B_q_73,
         B_q_74_electrification_hrs_available = B_q_74,
         
         B_q_80_electrification_satisfied = B_q_80,
         B_q_80_electrification_unsatisfied_expensive = B_q_80_1_1,
         B_q_80_electrification_unsatisfied_unavailable = B_q_80_1_2,
         B_q_80_electrification_unsatisfied_poorquality = B_q_80_1_3,
         B_q_80_electrification_unsatisfied_poormaintenance = B_q_80_1_4,
         
         B_q_81_grid_electricity_available = B_q_81, 
         B_q_81_grid_electricity_whynot_expensive = B_q_81_1, 
         B_q_81_grid_electricity_whynot_unavailable = B_q_81_2, 
         B_q_81_grid_electricity_whynot_unreliable = B_q_81_3, 
         B_q_81_grid_electricity_whynot_dk_how = B_q_81_4, 
         B_q_82_ever_grid_electricity = B_q_82,
         
         B_q_84_primary_cookfuel = B_q_84,
         B_q_85_cook_meals_daily = B_q_85,
         B_q_86_cook_hrs_daily = B_q_86,
         
         B_q_87_has_lpg = B_q_87,
         B_q_87_lpg_years = B_q_87_1,
         B_q_87_lpg_pmuy = B_q_87_2,
         B_q_87_lpg_connection = B_q_87_3,
         
         B_q_87_lpg_roti = B_q_87_4_1,
         B_q_87_lpg_veg_lentil = B_q_87_4_2,
         B_q_87_lpg_rice = B_q_87_4_3,
         B_q_87_lpg_teasnacks = B_q_87_4_4,
         B_q_87_lpg_milkboil = B_q_87_4_5,
         B_q_87_lpg_waterboil = B_q_87_4_6,
         B_q_87_lpg_nonveg = B_q_87_4_7,
         
         B_q_87_lpg_allneeds = B_q_87_5,
         B_q_87_lpg_allneeds_no_expensive = B_q_87_5_1_1,
         B_q_87_lpg_allneeds_no_freebiomassavail = B_q_87_5_1_2,
         B_q_87_lpg_allneeds_no_preferchulhasome= B_q_87_5_1_3,
         B_q_87_lpg_allneeds_no_dontlikelpgfood = B_q_87_5_1_4,
         B_q_87_lpg_allneeds_no_lpgavailabilityconstraint = B_q_87_5_1_5,
         
         B_q_87_large_lpg_cyls = B_q_87_7,
         B_q_87_large_lpg_cyls_cost_dist = B_q_87_12,
         B_q_87_large_lpg_cyls_cost_market = B_q_87_14,
         B_q_87_small_lpg_cyls = B_q_87_9,
         B_q_87_small_lpg_cyls_cost_dist = B_q_87_13,
         B_q_87_small_lpg_cyls_cost_market = B_q_87_15,
         B_q_87_lpg_doorstep_deliv = B_q_87_16,
         B_q_87_lpg_oneway = B_q_87_16_1,
         
         B_q_87_lpg_satisfaction = B_q_87_17,
         B_q_87_lpg_unsatisfied_expensive = B_q_87_17_1,
         B_q_87_lpg_unsatisfied_refillpooravail = B_q_87_17_1,
         B_q_87_lpg_unsatisfied_refilltoofar = B_q_87_17_1,
         B_q_87_lpg_unsatisfied_poormaintenance = B_q_87_17_1,
         
         B_q_87_lpg_refill_order_receive_gap = B_q_87_18,
         B_q_87_lpg_refill_subsidy_who_acct = B_q_87_19,
         B_q_87_lpg_refill_who_orders = B_q_87_20,
         B_q_87_lpg_refill_who_gets = B_q_87_21,
         
         B_q_88_no_lpg_why_not_avail_too_far = B_q_88_1,
         B_q_88_no_lpg_why_cnxn_too_expensive = B_q_88_2,
         B_q_88_no_lpg_why_lpgrefill_too_expensive = B_q_88_3,
         B_q_88_no_lpg_why_dk_how = B_q_88_4,
         
         B_q_89_no_lpg_wants = B_q_89,
         B_q_89_wants_lpg_wtp_cnxn = B_q_89_1,
         B_q_89_wants_lpg_wtp_refills = B_q_89_2,
         
         B_q_90_use_firewood_chips = B_q_90,
         B_q_90_collect_firewood_forest = B_q_90_1_1,
         B_q_90_collect_firewood_roadside = B_q_90_1_2,
         B_q_90_collect_firewood_farm = B_q_90_1_3,
         
         B_q_90_firewood_kg_week = B_q_90_2,
         B_q_90_firewood_kg_week_collected = B_q_90_3,
         B_q_90_firewood_kg_week_bought_market = B_q_90_4,
         B_q_90_firewood_kg_cost = B_q_90_5,
         B_q_90_firewood_collect_freq = B_q_90_6,
         B_q_90_firewood_collect_hrs_trip = B_q_90_6_1,
         B_q_90_firewood_collect_km_trip = B_q_90_6_2_1,
         B_q_90_firewood_buy_km_trip = B_q_90_6_2_2,
         
         B_q_91_use_dung = B_q_91,
         
         B_q_92_use_ag_residue = B_q_92,
         B_q_93_use_coal = B_q_93,
         
         B_q_96_kitchen = B_q_96,
         B_q_97_primary_cookstove_satisfaction = B_q_97,
  ) 


jsurveys_1 <- jsurveys %>% 
  mutate(B_primary_lpg = ifelse(B_q_84_primary_cookfuel==5, 1, 0),
         B_secondary_lpg = ifelse(B_q_84_primary_cookfuel!=5 &
                                    B_q_87_has_lpg==1, 1, 0),
         B_primary_polluting = ifelse(B_q_84_primary_cookfuel!=5, 1, 0),
         B_secondary_polluting = ifelse(((B_q_84_primary_cookfuel==5) & 
                                           (B_q_90_use_firewood_chips==1 |
                                              B_q_91_use_dung==1 |
                                              B_q_92_use_ag_residue==1 |
                                              B_q_93_use_coal==1)), 1, 0), 
         
         W_primary_lpg = ifelse(d_2_primary_cook_fuel==9, 1, 0),
         W_primary_polluting = ifelse(((d_2_primary_cook_fuel>=1 & 
                                          d_2_primary_cook_fuel<=6) |
                                         d_2_primary_cook_fuel==8), 1, 0),
         W_secondary_lpg = ifelse(d_2_primary_cook_fuel!=9 &
                                    d_1_9_any_cook_fuel_lpg==1, 1, 0),
         W_secondary_polluting = ifelse(((d_2_primary_cook_fuel>=9 |
                                            d_2_primary_cook_fuel==7) & 
                                           (d_1_1_firewood==1 |
                                              d_1_2_any_cook_fuel_agresidue==1 |
                                              d_1_3_any_cook_fuel_dung==1 |
                                              d_1_4_any_cook_fuel_charcoal==1 |
                                              d_1_5_any_cook_fuel_coal==1 |
                                              d_1_6_any_cook_fuel_kerosene==1 |
                                              d_1_8_any_cook_fuel_gasoline==1)), 1, 0),
         
         W_kerosene_lighting = ifelse(d_0_1_primary_lighting_fourdays==2 |
                                        d_0_all_lighting_kerosene==1, 1, 0)) %>% 
  mutate(B_no_polluting = ifelse(B_q_90_use_firewood_chips==0 &
                                   B_q_91_use_dung==0 &
                                   B_q_92_use_ag_residue==0 &
                                   B_q_93_use_coal==0, 1, 0), 
         B_no_lpg = ifelse(B_q_87_has_lpg==0, 1, 0), 
         
         W_no_lpg = ifelse(W_primary_lpg==0 & W_secondary_lpg==0, 1, 0),
         W_no_polluting = ifelse(W_primary_polluting==0 & W_secondary_polluting==0, 1, 0)) %>% 
  mutate(W_cookfuel_stack_category = ifelse(W_primary_polluting==1 & W_no_lpg==1, "Exclusive polluting", 
                                            ifelse(W_primary_polluting==1 & W_secondary_lpg==1, "Primary polluting, Secondary LPG", 
                                                   ifelse(W_primary_lpg==1 & W_secondary_polluting==1, "Primary LPG, Secondary polluting", 
                                                          ifelse(W_primary_lpg==1 & W_no_polluting==1, "Exclusive LPG", NA)))),
         B_cookfuel_stack_category = ifelse(B_primary_polluting==1 & B_no_lpg==1, "Exclusive polluting", 
                                            ifelse(B_primary_polluting==1 & B_secondary_lpg==1, "Primary polluting, Secondary LPG", 
                                                   ifelse(B_primary_lpg==1 & B_secondary_polluting==1, "Primary LPG, Secondary polluting", 
                                                          ifelse(B_primary_lpg==1 & B_no_polluting==1, "Exclusive LPG", NA)))),
  ) %>% 
  # Any COVID-related economic impacts? 
  mutate(
    weekly_econ_hardship_any = ifelse(c_1_1_econ_hardship_difficulty_food=="1" | 
                                        c_1_2_econ_hardship_increase_prices=="1" | 
                                        c_1_3_econ_hardship_reduced_income=="1" | 
                                        c_1_4_econ_hardship_salary_employment_loss=="1" | 
                                        c_1_5_econ_hardship_hourly_employment_loss=="1" | 
                                        c_1_6_econ_hardship_hourly_employment_reduced=="1" | 
                                        c_1_7_econ_hardship_other_cash_sources_reduced=="1" | 
                                        c_1_8_econ_hardship_unable_tolook_employment=="1", 1, 0),
    weekly_econ_hardship_job_money = ifelse(c_1_3_econ_hardship_reduced_income=="1" | 
                                              c_1_4_econ_hardship_salary_employment_loss=="1" | 
                                              c_1_5_econ_hardship_hourly_employment_loss=="1" | 
                                              c_1_6_econ_hardship_hourly_employment_reduced=="1" | 
                                              c_1_7_econ_hardship_other_cash_sources_reduced=="1" | 
                                              c_1_8_econ_hardship_unable_tolook_employment=="1", 1, 0),
  ) %>% 
  # Any use of three free cylinders?
  arrange(hhid, round) %>% 
  group_by(hhid) %>% 
  mutate(
    free_cylinder_total = h_4_1_obtained_free_cylinder_number,
    free_cylinder_total_lag1 = lag(h_4_1_obtained_free_cylinder_number, 1),
    free_cylinder_total_lag2 = lag(h_4_1_obtained_free_cylinder_number, 2),
    free_cylinder_total_lag3 = lag(h_4_1_obtained_free_cylinder_number, 3),
    free_cylinder_total_lag4 = lag(h_4_1_obtained_free_cylinder_number, 4),
    free_cylinder_total_lag5 = lag(h_4_1_obtained_free_cylinder_number, 5)
  ) %>% 
  ungroup() %>% 
  mutate(
    free_cylinder_change_lag1 = free_cylinder_total - free_cylinder_total_lag1,
    free_cylinder_change_lag2 = free_cylinder_total - free_cylinder_total_lag2,
    free_cylinder_change_lag3 = free_cylinder_total - free_cylinder_total_lag3,
    free_cylinder_change_lag4 = free_cylinder_total - free_cylinder_total_lag4,
    free_cylinder_change_lag5 = free_cylinder_total - free_cylinder_total_lag5) %>% 
  mutate(free_cylinder_change = ifelse(!is.na(free_cylinder_change_lag1), free_cylinder_change_lag1,
                                       ifelse(!is.na(free_cylinder_change_lag2), free_cylinder_change_lag2,
                                              ifelse(!is.na(free_cylinder_change_lag3), free_cylinder_change_lag3,
                                                     ifelse(!is.na(free_cylinder_change_lag4), free_cylinder_change_lag4,
                                                            ifelse(!is.na(free_cylinder_change_lag5), free_cylinder_change_lag5, NA)))))) %>% 
  group_by(hhid, b_1_relation_to_hhead) %>% 
  mutate(free_cylinder_aware = h_1_threecylinder_aware,
         free_cylinder_aware_lag1 = lag(h_1_threecylinder_aware, 1),
         free_cylinder_aware_lag2 = lag(h_1_threecylinder_aware, 2),
         free_cylinder_aware_lag3 = lag(h_1_threecylinder_aware, 3),
         free_cylinder_aware_lag4 = lag(h_1_threecylinder_aware, 4),
         free_cylinder_aware_lag5 = lag(h_1_threecylinder_aware, 5)) %>% 
  ungroup() %>% 
  mutate(
    free_cylinder_aware_change_lag1 = free_cylinder_aware - free_cylinder_aware_lag1,
    free_cylinder_aware_change_lag2 = free_cylinder_aware - free_cylinder_aware_lag2,
    free_cylinder_aware_change_lag3 = free_cylinder_aware - free_cylinder_aware_lag3,
    free_cylinder_aware_change_lag4 = free_cylinder_aware - free_cylinder_aware_lag4,
    free_cylinder_aware_change_lag5 = free_cylinder_aware - free_cylinder_aware_lag5
  ) %>% 
  mutate(free_cylinder_aware_change = ifelse(!is.na(free_cylinder_aware_change_lag1), free_cylinder_aware_change_lag1,
                                             ifelse(!is.na(free_cylinder_aware_change_lag2), free_cylinder_aware_change_lag2,
                                                    ifelse(!is.na(free_cylinder_aware_change_lag3), free_cylinder_aware_change_lag3,
                                                           ifelse(!is.na(free_cylinder_aware_change_lag4), free_cylinder_aware_change_lag4,
                                                                  ifelse(!is.na(free_cylinder_aware_change_lag5), free_cylinder_aware_change_lag5, NA))))))  %>% 
  dplyr::mutate(e_8_refill_next_wks = as.numeric(e_8_refill_next_wks),
                e_2_refill_lpg_wksago = as.numeric(e_2_refill_lpg_wksago)) %>% 
  dplyr::mutate(lpg_refill_gap = as.numeric(e_8_refill_next_wks) + as.numeric(e_2_refill_lpg_wksago)) %>% 
  mutate(round_digit = plyr::mapvalues(round,
                                       from=c("Round 1", "Round 2", "Round 3", "Round 4",
                                              "Round 5", "Round 6"),
                                       to=c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(round_digit = factor(round_digit, levels = c("1", "2", "3", "4", "5", "6"))) 




# Supplementary Figure 1 -------

jsurveys_1 %>% 
  group_by(hhid) %>% 
  summarize(n=n()) %>% 
  summarize(n1 = sum(n==1),
            n2 = sum(n==2),
            n3 = sum(n==3),
            n4 = sum(n==4),
            n5 = sum(n==5),
            n6 = sum(n==6))

jparts_round <- jsurveys_1 %>% 
  distinct(hhid, round_digit) %>% 
  mutate(surveyed = 1) %>% 
  mutate(round_digit = as.numeric(round_digit))

jparts_round_first <- jparts_round %>% 
  group_by(hhid) %>% 
  mutate(surveyed_first = min(round_digit)) %>% 
  distinct(hhid, surveyed_first)

jparts_round_full_df <- data.frame(matrix(NA, length(unique(jsurveys_1$hhid))*6, 2))
colnames(jparts_round_full_df) <- c("hhid", "round_digit")

jparts_round_full <- jparts_round_full_df %>% 
  mutate(hhid = rep(unique(jsurveys_1$hhid), 6)) %>% 
  arrange(hhid) %>% 
  mutate(round_digit = rep(c(1, 2, 3, 4, 5, 6), 
                           length(unique(jsurveys_1$hhid)))) %>% 
  left_join(jparts_round) %>% 
  mutate(surveyed = ifelse(!is.na(surveyed), 1, 0),
         round_digit = factor(round_digit, levels = c(1, 2, 3, 4, 5, 6))) %>% 
  left_join(jparts_round_first) %>% 
  group_by(hhid) %>% 
  mutate(surveyed_total = sum(surveyed)) %>% 
  ungroup()

view(jparts_round_full)

j_repeat_obs_round_fig_1 <- ggplot(jparts_round_full %>% 
                                     filter(surveyed_total==1) %>% 
                                     mutate(hhid_fct = forcats::fct_reorder(hhid, surveyed_first),
                                            surveyed = plyr::mapvalues(surveyed,
                                                                       from=c(0, 1),
                                                                       to=c("Not Surveyed", "Surveyed"))), 
       aes(x=round_digit, y=hhid_fct, 
           color=as.factor(surveyed), fill=as.factor(surveyed))) + 
  geom_tile() + 
  ggsci::scale_fill_aaas() +
  ggsci::scale_color_aaas() +
  annotate("text", x=7, y=107/2, label="One time\n(N=107)", size=3, color="black") + 
  coord_cartesian(xlim=c(0.45, 7), clip="off") + 
  theme_classic() + 
  xlab("") + ylab("") + 
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.title = element_blank(),
        axis.text.y = element_blank(),
        plot.title = element_blank(),
        axis.ticks.y = element_blank(),
        plot.background = element_blank(),
        axis.line = element_blank())

j_repeat_obs_round_fig_2 <- ggplot(jparts_round_full %>% 
                                     filter(surveyed_total==2) %>% 
                                     mutate(hhid_fct = forcats::fct_reorder(hhid, surveyed_first)), 
                                   aes(x=round_digit, y=hhid_fct, 
                                       color=as.factor(surveyed), fill=as.factor(surveyed))) + 
  geom_tile() + 
  ggsci::scale_fill_aaas() +
  ggsci::scale_color_aaas() +
  annotate("text", x=7, y=104/2, label="Two times\n(N=104)", size=3, color="black") +
  coord_cartesian(xlim=c(0.45, 7), clip="off") + 
  theme_classic() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        plot.title = element_blank(),
        axis.ticks = element_blank(),
        plot.background = element_blank(),
        axis.line = element_blank())

j_repeat_obs_round_fig_2

j_repeat_obs_round_fig_3 <- ggplot(jparts_round_full %>% 
                                     filter(surveyed_total==3) %>% 
                                     mutate(hhid_fct = forcats::fct_reorder(hhid, surveyed_first)), 
                                   aes(x=round_digit, y=hhid_fct, 
                                       color=as.factor(surveyed), fill=as.factor(surveyed))) + 
  geom_tile() + 
  ggsci::scale_fill_aaas() +
  ggsci::scale_color_aaas() +
  annotate("text", x=7, y=45, label="Three times\n(N=90)", size=3, color="black") +
  coord_cartesian(xlim=c(0.45, 7), clip="off") + 
  theme_classic() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        axis.title = element_blank(),
        plot.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.background = element_blank(),
        axis.line = element_blank())

j_repeat_obs_round_fig_3

j_repeat_obs_round_fig_4 <- ggplot(jparts_round_full %>% 
                                     filter(surveyed_total==4) %>% 
                                     mutate(hhid_fct = forcats::fct_reorder(hhid, surveyed_first)), 
                                   aes(x=round_digit, y=hhid_fct, 
                                       color=as.factor(surveyed), fill=as.factor(surveyed))) + 
  geom_tile() + 
  ggsci::scale_fill_aaas() +
  ggsci::scale_color_aaas() +
  annotate("text", x=7, y=151/2, label="Four times\n(N=151)", size=3, color="black") +
  coord_cartesian(xlim=c(0.45, 7), clip="off") + 
  theme_classic() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_blank(),
        plot.background = element_blank(),
        axis.line = element_blank())

j_repeat_obs_round_fig_4

j_repeat_obs_round_fig_5 <- ggplot(jparts_round_full %>% 
                                     filter(surveyed_total==5) %>% 
                                     mutate(hhid_fct = forcats::fct_reorder(hhid, surveyed_first)), 
                                   aes(x=round_digit, y=hhid_fct, 
                                       color=as.factor(surveyed), fill=as.factor(surveyed))) + 
  geom_tile() + 
  ggsci::scale_fill_aaas() +
  ggsci::scale_color_aaas() +
  annotate("text", x=7, y=169/2, label="Five times\n(N=169)", size=3, color="black") +
  coord_cartesian(xlim=c(0.45, 7), clip="off") + 
  theme_classic() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        plot.title = element_blank(),
        axis.ticks = element_blank(),
        plot.background = element_blank(),
        axis.line = element_blank())

j_repeat_obs_round_fig_5

j_repeat_obs_round_fig_6 <- ggplot(jparts_round_full %>% 
                                     filter(surveyed_total==6) %>% 
                                     mutate(hhid_fct = forcats::fct_reorder(hhid, surveyed_first)), 
                                   aes(x=round_digit, y=hhid_fct, 
                                       color=as.factor(surveyed), fill=as.factor(surveyed))) + 
  geom_tile() + 
  ggsci::scale_fill_aaas() +
  ggsci::scale_color_aaas() +
  annotate("text", x=7, y=130.5, label="Six times\n(N=261)", size=3, color="black") +
  coord_cartesian(xlim=c(0.45, 7), clip="off") + 
  theme_classic() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        plot.title = element_blank(),
        axis.ticks = element_blank(),
        plot.background = element_blank(),
        axis.line = element_blank())

j_repeat_obs_round_fig_6


j_repeat_obs_round_fig <- 
  plot_grid(
    j_repeat_obs_round_fig_6,
    j_repeat_obs_round_fig_5,
    j_repeat_obs_round_fig_4, 
    j_repeat_obs_round_fig_3, 
    j_repeat_obs_round_fig_2,
    j_repeat_obs_round_fig_1,
    align="hv",
    nrow=6,
    rel_heights=c(261, 169, 151, 90, 104, 187)
  )


# cowplot::ggsave2("~/Dropbox/Jharkhand-COVID19/Code/figures/december2021/j_repeat_obs_round_fig.png",
#                  j_repeat_obs_round_fig,
#                  height = 200,
#                  width = 150,
#                  units = "mm",
#                  dpi=200)


# Estimated statistic: ever reported socio economic impacts m-------
ever_covid_hardships_df <- jsurveys_1 %>% 
  group_by(hhid) %>% 
  summarize(econ_hardship_difficulty_food = mean(c_1_1_econ_hardship_difficulty_food=="1", na.rm=T),
            econ_hardship_increase_prices = mean(c_1_2_econ_hardship_increase_prices=="1", na.rm=T),
            econ_hardship_reduced_income = mean(c_1_3_econ_hardship_reduced_income=="1", na.rm=T),
            econ_hardship_salary_employment_loss = mean(c_1_4_econ_hardship_salary_employment_loss=="1", na.rm=T),
            econ_hardship_hourly_employment_loss = mean(c_1_5_econ_hardship_hourly_employment_loss=="1", na.rm=T),
            econ_hardship_hourly_employment_reduced = mean(c_1_6_econ_hardship_hourly_employment_reduced=="1", na.rm=T),
            econ_hardship_other_cash_sources_reduced = mean(c_1_7_econ_hardship_other_cash_sources_reduced=="1", na.rm=T),
            econ_hardship_unable_tolook_employment = mean(c_1_8_econ_hardship_unable_tolook_employment=="1", na.rm=T),
            hh_size_fewer = mean(c_2_hh_size_change==1, na.rm=T),
            hh_size_same = mean(c_2_hh_size_change==2, na.rm=T),
            hh_size_more = mean(c_2_hh_size_change==3, na.rm=T)) %>% 
  # ungroup() %>% 
  mutate(econ_hardship_difficulty_food = ifelse(econ_hardship_difficulty_food>0, 1, 0),
         econ_hardship_increase_prices = ifelse(econ_hardship_increase_prices>0, 1, 0),
         econ_hardship_reduced_income = ifelse(econ_hardship_reduced_income>0, 1, 0),
         econ_hardship_salary_employment_loss = ifelse(econ_hardship_salary_employment_loss>0, 1, 0),
         econ_hardship_hourly_employment_loss = ifelse(econ_hardship_hourly_employment_loss>0, 1, 0),
         econ_hardship_hourly_employment_reduced = ifelse(econ_hardship_hourly_employment_reduced>0, 1, 0),
         econ_hardship_other_cash_sources_reduced = ifelse(econ_hardship_other_cash_sources_reduced>0, 1, 0),
         econ_hardship_unable_tolook_employment = ifelse(econ_hardship_unable_tolook_employment>0, 1, 0),
         hh_size_fewer = ifelse(hh_size_fewer>0, 1, 0),
         hh_size_same = ifelse(hh_size_same>0, 1, 0),
         hh_size_more = ifelse(hh_size_more>0, 1, 0)) %>% 
  summarize(econ_hardship_difficulty_food = mean(econ_hardship_difficulty_food, na.rm=T),
            econ_hardship_increase_prices = mean(econ_hardship_increase_prices, na.rm=T),
            econ_hardship_reduced_income = mean(econ_hardship_reduced_income, na.rm=T),
            econ_hardship_salary_employment_loss = mean(econ_hardship_salary_employment_loss, na.rm=T),
            econ_hardship_hourly_employment_loss = mean(econ_hardship_hourly_employment_loss, na.rm=T),
            econ_hardship_hourly_employment_reduced = mean(econ_hardship_hourly_employment_reduced, na.rm=T),
            econ_hardship_other_cash_sources_reduced = mean(econ_hardship_other_cash_sources_reduced, na.rm=T),
            econ_hardship_unable_tolook_employment = mean(econ_hardship_unable_tolook_employment, na.rm=T),
            hh_size_fewer = mean(hh_size_fewer, na.rm=T),
            hh_size_same = mean(hh_size_same, na.rm=T),
            hh_size_more = mean(hh_size_more, na.rm=T)) %>% 
  mutate(round = "Ever Reported") %>% 
  pivot_longer(-round,"parameter", "value")


# Figure 1 -------
sample_districts <- jsurveys_1 %>% 
  rename(districts = `District_cal_District Name`) %>% 
  distinct(districts) %>% 
  bind_rows(bbaseline %>% 
              filter(q_103 %in% bsurveys$hhid) %>% 
              mutate(districts = str_to_title(q_109)) %>% 
              distinct(districts)) %>% 
  arrange(districts)

study_states <- shape_adm1 %>% 
  filter(ST_NM=="Bihar" |
           ST_NM=="Jharkhand") 

study_districts <- shape_adm2 %>% 
  filter(NAME_1=="Bihar" |
           NAME_1=="Jharkhand") %>% 
  mutate(sampled = 
           ifelse(NAME_2 %in% sample_districts$districts, 1, 0)) %>% 
  mutate(sampled_cat = ifelse(sampled==1 & NAME_1=="Bihar", "Bihar",
                              ifelse(sampled==1 & NAME_1=="Jharkhand", "Jharkhand", "Not Sampled")))

study_map_1 <- 
  ggplot(shape_adm1) + 
  geom_sf(fill="white", color="grey40") + 
  geom_sf(data=study_states, aes(fill=sampled), fill=c("white", "#3B4992FF", "#EE0000FF")) +
  annotate("text", x=87.4, y=28.99, label="Bihar") + 
  geom_segment(aes(x=86.5, xend=85.5, y= 27.8, yend=26),
               arrow=arrow(type = "closed",angle = 20,length = unit(2, "mm")), color="black", size=0.4) + 
  
  annotate("text", x=90, y=18.05, label="Jharkhand") + 
  geom_segment(aes(x=89.6, xend=86, y= 19, yend=23),
               arrow=arrow(type = "closed",angle = 20,length = unit(2, "mm")), color="black", size=0.4) + 
  theme_map() +
  coord_sf() + 
  theme(legend.position = "none")

# study_map_1

study_map_2 <- 
  ggplot() + 
  geom_sf(data=study_districts, aes(fill=as.factor(sampled_cat)), color="grey70") +
  scale_fill_manual(values=c("#3B4992FF", "#EE0000FF", "white")) + 
  geom_sf(data=shape_adm1 %>%
            filter(ST_NM=="Bihar" |
                     ST_NM=="Jharkhand"), color="grey40", fill=NA) + 
  theme_map() +
  coord_sf() + 
  guides(fill=guide_legend(title="In Sample")) + 
  theme(legend.position = c(0.9, 0.2))


# lpg prices -------
lpg_prices1 <- lpg_prices %>% 
  mutate(Month = lubridate::ymd(Month))

timeline_prices <- ggplot(lpg_prices1, aes(x=Month, 
                                           y=`LPG Refill Price (INR)`, 
                                           group=Location, 
                                           color=Location)) + 
  geom_line() + 
  ggsci::scale_color_aaas() + 
  ggtitle("14.2 kg LPG cylinder refill price") + 
  
  scale_y_continuous(labels = scales::unit_format(unit = "INR",big.mark = ","),
                     breaks=c(500, 550, 600, 650, 700, 
                              750, 800, 850,  900, 950, 1000)) + 
  annotate("text", x=ymd("2021-10-15"), y=1020, label="Patna, Bihar", color="#3B4992FF", size=3) + 
  annotate("text", x=ymd("2021-10-15"), y=920, label="Ranchi, Jharkhand", color="#EE0000FF", size=3) + 
  coord_cartesian(xlim=c(ymd("2020-01-01"), ymd("2021-11-01"))) +
  scale_x_date(date_breaks = "4 month") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="black"),
        axis.text.x = element_text(size=11, family="Helvetica", color="black"),
        plot.title = element_text(size=11, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 


timeline_cases <- ggplot(covid_states %>% 
                           filter(State=="Bihar" |
                                    State=="Jharkhand"), 
                         aes(x=ymd(Date), 
                             y=Confirmed, 
                             group=State,
                             color=State)) + 
  geom_line() + 
  ggsci::scale_color_aaas() + 
  ggtitle("Total COVID-19 Cases") + 
  scale_y_continuous(breaks=c(0, 100000, 200000, 
                              300000, 400000, 500000, 600000, 700000,
                              800000),
                     labels = scales::comma_format(big.mark = ",")) +
  
  geom_segment(aes(x=ymd("2020-06-15"), xend=ymd("2020-08-01"), y= 250000, yend=90000),
               arrow=arrow(type = "closed",angle = 20,length = unit(2, "mm")), color="black", size=0.4) + 
  annotate("text", x=ymd("2020-06-01"), y=300000, label="1st Wave", 
           color="black", fill="white", size=3) +
  
  geom_segment(aes(x=ymd("2021-03-15"), xend=ymd("2021-04-20"), y= 550000, yend=475000),
               arrow=arrow(type = "closed",angle = 20,length = unit(2, "mm")), color="black", size=0.4) + 
  annotate("text", x=ymd("2021-02-15"), y=600000, label="'Delta' Wave", 
           color="black", fill="white", size=3) +
  
  annotate("text", x=ymd("2021-09-01"), y=750000, label="Bihar", color="#3B4992FF", size=3) +
  annotate("text", x=ymd("2021-09-01"), y=390000, label="Jharkhand", color="#EE0000FF", size=3) +
  coord_cartesian(xlim=c(ymd("2020-01-01"), ymd("2021-11-01"))) +
  scale_x_date(date_breaks = "4 month") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="black"),
        axis.text.x = element_text(size=11, family="Helvetica", color="black"),
        plot.title = element_text(size=11, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 

timeline_fig <- ggplot() + 
  
  coord_cartesian(xlim=c(ymd("2020-01-01"), ymd("2021-11-01"))) +
  
  scale_x_date(date_breaks = "4 month") + 
  
  # Jharkhand survey rounds
  annotate("text", x=ymd("2020-02-15"), y=0.2, label="Jharkhand\nSurveys", 
           color="#BB0021FF", size=3.5) + 
  # annotate("text", x=ymd("2020-08-06"), y=.15, label="1", size=3) + 
  geom_rect(aes(xmin=ymd("2020-07-23"), xmax=ymd("2020-08-22"), 
                ymin=0.15, ymax=0.25), fill="#EE0000FF", color="white") + 
   
  geom_rect(aes(xmin=ymd("2020-09-24"), xmax=ymd("2020-10-21"), 
                ymin=0.15, ymax=0.25), fill="#EE0000FF", color="white") + 
   
  geom_rect(aes(xmin=ymd("2020-11-28"), xmax=ymd("2020-12-18"), 
                ymin=0.15, ymax=0.25), fill="#EE0000FF", color="white") + 
  geom_rect(aes(xmin=ymd("2021-03-08"), xmax=ymd("2021-04-22"), 
                ymin=0.15, ymax=0.25), fill="#EE0000FF", color="white") + 
  geom_rect(aes(xmin=ymd("2021-05-13"), xmax=ymd("2021-06-02"), 
                ymin=0.15, ymax=0.25), fill="#EE0000FF", color="white") +

  geom_rect(aes(xmin=ymd("2021-06-15"), xmax=ymd("2021-07-05"), 
                ymin=0.15, ymax=0.25), fill="#EE0000FF", color="white") +
  
  # Bihar survey rounds
  annotate("text", x=ymd("2020-02-15"), y=0.35, label="Bihar\nSurveys", 
           color="#3B4992FF", size=3.5) + 
  geom_rect(aes(xmin=ymd("2021-02-15"), xmax=ymd("2021-02-18"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") + 
  geom_rect(aes(xmin=ymd("2021-02-24"), xmax=ymd("2021-02-27"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") +  
  geom_rect(aes(xmin=ymd("2021-03-02"), xmax=ymd("2021-03-06"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") + 
  geom_rect(aes(xmin=ymd("2021-03-09"), xmax=ymd("2021-03-12"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") + 
  geom_rect(aes(xmin=ymd("2021-03-16"), xmax=ymd("2021-03-20"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") +
  geom_rect(aes(xmin=ymd("2021-03-23"), xmax=ymd("2021-03-27"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") +
  geom_rect(aes(xmin=ymd("2021-03-31"), xmax=ymd("2021-04-05"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") +
  geom_rect(aes(xmin=ymd("2021-04-06"), xmax=ymd("2021-04-09"), 
                ymin=0.30, ymax=0.40), fill="#3B4992FF", color="white") +
  
  annotate("text", x=ymd("2020-02-15"), y=0.05, label="Three free\ncylinder scheme", 
           color="#E18727FF", size=3.5) + 
  geom_rect(aes(xmin=ymd("2020-04-01"), xmax=ymd("2020-09-30"),
                ymin=0, ymax=0.1), fill="#E18727FF", color="white") +
  theme_nothing() + 
  theme(
    axis.text.x = element_text(size=11, family="Helvetica")
  )


full_timeline_fig <- 
  plot_grid(
    timeline_cases,
    timeline_prices,
    timeline_fig,
    align="hv",
    rel_heights = c(1, 1, 1),
    nrow=3,
    labels=c("b", "c", "d")
    
  )

study_map <- 
  plot_grid(
    study_map_1,
    study_map_2,
    nrow=2
    # rel_widths = c(1, 0.5)
  )


map_timeline <-
  plot_grid(
    study_map,
    full_timeline_fig,
    ncol=2,
    rel_widths = c(0.5, 1),
    labels=c("a", "")
  )

# cowplot::ggsave2("~/BurkeLab Dropbox/Carlos Gould/Jharkhand-COVID19/Code/figures/figure1.pdf",
#                  map_timeline,
#                  height = 175,
#                  width = 350,
#                  units = "mm",
#                  dpi=600)

# Figure 2 -------

# Figure 2a -------

covid_hardships_reduced_df <- jsurveys_1 %>% 
  mutate(c_1_7_econ_hardship_other_cash_sources_reduced = ifelse(round=="Round 1", 0, c_1_7_econ_hardship_other_cash_sources_reduced),
         c_1_8_econ_hardship_unable_tolook_employment = ifelse(round=="Round 1", 0, c_1_8_econ_hardship_unable_tolook_employment)) %>% 
  group_by(round) %>% 
  summarize(econ_hardship_food_prices = mean(c_1_1_econ_hardship_difficulty_food=="1" |
                                                   c_1_2_econ_hardship_increase_prices=="1", na.rm=T),
            econ_hardship_reduced_income_employment = mean(c_1_3_econ_hardship_reduced_income=="1" |
                                                             c_1_6_econ_hardship_hourly_employment_reduced=="1" |
                                                             c_1_7_econ_hardship_other_cash_sources_reduced=="1", na.rm=T),
            econ_hardship_lost_employment = mean(c_1_4_econ_hardship_salary_employment_loss=="1" |
                                                          c_1_5_econ_hardship_hourly_employment_loss=="1" |
                                                   c_1_8_econ_hardship_unable_tolook_employment=="1", na.rm=T)) %>% 
  pivot_longer(-round,"parameter", "value") %>% 
  mutate(parameter = plyr::mapvalues(parameter,
                                     from=c("econ_hardship_food_prices", 
                                            "econ_hardship_reduced_income_employment",
                                            "econ_hardship_lost_employment"),
                                     to=c("Increased prices for goods or\ndifficulty accessing food for household", 
                                          "Reduced income, hours of employment,\nor other financial help",
                                          "Loss of employment or\nability to find job"))) %>% 
  mutate(round_digit = plyr::mapvalues(round,
                                       from=c("Round 1", "Round 2", "Round 3", "Round 4",
                                              "Round 5", "Round 6"),
                                       to=c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(parameter = factor(parameter, levels=c("Increased prices for goods or\ndifficulty accessing food for household", 
                                                "Reduced income, hours of employment,\nor other financial help",
                                                "Loss of employment or\nability to find job")),
         round_digit = factor(round_digit, levels = c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = value+0.05) %>% 
  mutate(label_position = ifelse((parameter=="Reduced income, hours of employment,\nor other financial help" |
                                           parameter=="Loss of employment or\nability to find job") &
                                          (round_digit=="3" |
                                             round_digit=="4" |
                                             round_digit=="6"), value-0.05, label_position))

covid_hardship_reduced_fig_line_1 <- ggplot(covid_hardships_reduced_df %>% 
                                              filter(parameter=="Increased prices for goods or\ndifficulty accessing food for household"), 
                                          aes(x=round_digit, y=value, 
                                              group=parameter, color=parameter)) + 
  geom_point(size=0.8) +
  geom_line(size=0.8) +
  geom_text(data=covid_hardships_reduced_df  %>% 
              filter(parameter=="Increased prices for goods or\ndifficulty accessing food for household"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=3) + 
  ggtitle("Increased prices for goods or\ndifficulty accessing food for household") + 
  scale_color_manual(values=ggsci::pal_aaas("default")(10)[1]) + 
  # scale_fill_viridis_d() + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1), clip="off") + 
  # guide_legend(reverse = TRUE) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        plot.title = element_text(size=11, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 

covid_hardship_reduced_fig_line_2 <- ggplot(covid_hardships_reduced_df %>% 
                                              filter(parameter=="Reduced income, hours of employment,\nor other financial help"), 
                                            aes(x=round_digit, y=value, 
                                                group=parameter, color=parameter)) + 
  geom_point(size=0.8) +
  geom_line(size=0.8) +
  geom_text(data=covid_hardships_reduced_df  %>% 
              filter(parameter=="Reduced income, hours of employment,\nor other financial help"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=3) + 
  ggtitle("Reduced income, hours of employment,\nor other financial help") + 
  scale_color_manual(values=ggsci::pal_aaas("default")(10)[2]) + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1), clip="off") + 
  # guide_legend(reverse = TRUE) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        plot.title = element_text(size=11, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 

covid_hardship_reduced_fig_line_3 <- ggplot(covid_hardships_reduced_df %>% 
                                              filter(parameter=="Loss of employment or\nability to find job"), 
                                            aes(x=round_digit, y=value, group=parameter, color=parameter)) + 
  geom_point(size=0.8) +
  geom_line(size=0.8) +
  geom_text(data=covid_hardships_reduced_df  %>% 
              filter(parameter=="Loss of employment or\nability to find job"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=3) + 
  ggtitle("Loss of employment or\ninability to find job") + 
  scale_color_manual(values=ggsci::pal_aaas("default")(10)[3]) + 
  # scale_fill_viridis_d() + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1), clip="off") + 
  # guide_legend(reverse = TRUE) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        plot.title = element_text(size=11, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 



covid_hardship_reduced_fig_alt <- plot_grid(
  covid_hardship_reduced_fig_line_1,
  covid_hardship_reduced_fig_line_2,
  covid_hardship_reduced_fig_line_3,
  align="hv",
  nrow=1
)


covid_hardship_reduced_fig_alt_1 <- plot_grid(
  NULL, 
  covid_hardship_reduced_fig_alt,
  align="hv",
  nrow=2,
  rel_heights=c(0.1, 1),
  label_x = -0.25,
  labels = c("Reported COVID-19 related socio-economic changes and hardships")
)



# Figure 2b ------

covid_energy_access_reduced_df <- jsurveys_1 %>% 
  mutate(lpg_delivery_harder = ifelse((e_4_1_refill_differences_nodelivery=="1" |
                                  e_4_5_refill_differences_pickupfurther=="1") &
                                 (e_4_2_refill_differences_deliverynow=="0" &
                                    e_4_6_refill_differences_pickupcloser=="0"), 1, 0),
    lpg_delivery_easier = ifelse((e_4_1_refill_differences_nodelivery=="0" &
                                  e_4_5_refill_differences_pickupfurther=="0") &
                                 (e_4_2_refill_differences_deliverynow=="1" |
                                    e_4_6_refill_differences_pickupcloser=="1"), 1, 0),
    
    biomass_collect_less_freq_amt = ifelse((f_1_firewood_compare_collect_freq_inlockdown=="3" |
                                            f_2_firewood_compare_collect_amt_inlockdown=="3") &
                                           (f_1_firewood_compare_collect_freq_inlockdown!="1" &
                                              f_2_firewood_compare_collect_amt_inlockdown!="1"), 1, 0),
    biomass_collect_more_freq_amt = ifelse((f_1_firewood_compare_collect_freq_inlockdown!="3" &
                                            f_2_firewood_compare_collect_amt_inlockdown!="3") & 
                                           (f_1_firewood_compare_collect_freq_inlockdown=="1" |
                                              f_2_firewood_compare_collect_amt_inlockdown=="1"), 1, 0)) %>% 
  mutate(lpg_delivery_same = 1 - (lpg_delivery_harder + lpg_delivery_easier),
         biomass_collect_same_freq_amt = 1 - (biomass_collect_less_freq_amt + biomass_collect_more_freq_amt)) %>% 
  group_by(round) %>% 
  summarize(
    lpg_delivery_harder = mean(lpg_delivery_harder, na.rm=T),
    lpg_delivery_same = mean(lpg_delivery_same, na.rm=T),
    lpg_delivery_easier = mean(lpg_delivery_easier, na.rm=T),
    
    lpg_subsidy_time_less = mean(e_5_subsidy_time_compare=="3", na.rm=T),
    lpg_subsidy_time_same = mean(e_5_subsidy_time_compare=="2", na.rm=T),
    lpg_subsidy_time_more = mean(e_5_subsidy_time_compare=="1", na.rm=T),
    
    lpg_refillcost_less = mean(g_1_lpg_refill_cost_compare_inlockdown=="4" |
                                 g_1_lpg_refill_cost_compare_inlockdown=="5", na.rm=T),
    lpg_refillcost_same = mean(g_1_lpg_refill_cost_compare_inlockdown=="3", na.rm=T),
    lpg_refillcost_more = mean(g_1_lpg_refill_cost_compare_inlockdown=="1" |
                                 g_1_lpg_refill_cost_compare_inlockdown=="2", na.rm=T),

    biomass_collect_more_freq_amt = mean(biomass_collect_more_freq_amt, na.rm=T),
    biomass_collect_same_freq_amt = mean(biomass_collect_same_freq_amt, na.rm=T),
    biomass_collect_less_freq_amt = mean(biomass_collect_less_freq_amt, na.rm=T),
    
    biomass_store_less_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="3", na.rm=T),
    biomass_store_same_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="2", na.rm=T),
    biomass_store_more_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="1", na.rm=T),
    
    biomass_collect_ease_harder = mean(f_3_firewood_compare_reserve_amt_inlockdown=="3", na.rm=T),
    biomass_collect_ease_same = mean(f_3_firewood_compare_reserve_amt_inlockdown=="2", na.rm=T),
    biomass_collect_ease_easier = mean(f_3_firewood_compare_reserve_amt_inlockdown=="1", na.rm=T),
  ) %>% 
  pivot_longer(-round,"parameter", "value") %>% 
  mutate(round_digit = plyr::mapvalues(round,
                                       from=c("Round 1", "Round 2", "Round 3", "Round 4",
                                              "Round 5", "Round 6"),
                                       to=c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(variable = plyr::mapvalues(parameter, 
                                    from=c(
                                      "lpg_delivery_harder", "lpg_delivery_same", "lpg_delivery_easier",
                                      "lpg_subsidy_time_less", "lpg_subsidy_time_same", "lpg_subsidy_time_more",
                                      "lpg_refillcost_less", "lpg_refillcost_same", "lpg_refillcost_more",
                                      
                                      "biomass_collect_less_freq_amt", "biomass_collect_same_freq_amt", "biomass_collect_more_freq_amt",
                                      "biomass_collect_ease_harder", "biomass_collect_ease_same", "biomass_collect_ease_easier",
                                      "biomass_store_less_amt", "biomass_store_same_amt", "biomass_store_more_amt"),
                                    to=c(
                                      "LPG refill acquisition", "LPG refill acquisition", "LPG refill acquisition",
                                      "LPG subsidy deposit wait time", "LPG subsidy deposit wait time", "LPG subsidy deposit wait time",
                                      "LPG refill cost", "LPG refill cost", "LPG refill cost",
                                      
                                      "Biomass collection amount", "Biomass collection amount", "Biomass collection amount",
                                      "Biomass collection difficulty", "Biomass collection difficulty", "Biomass collection difficulty",
                                      "Biomass reserve amount", "Biomass reserve amount", "Biomass reserve amount")),
         change = plyr::mapvalues(parameter,
                                  from=c(
                                    "lpg_delivery_harder", "lpg_delivery_same", "lpg_delivery_easier",
                                    "lpg_subsidy_time_less", "lpg_subsidy_time_same", "lpg_subsidy_time_more",
                                    "lpg_refillcost_less", "lpg_refillcost_same", "lpg_refillcost_more",
                                    
                                    "biomass_collect_less_freq_amt", "biomass_collect_same_freq_amt", "biomass_collect_more_freq_amt",
                                    "biomass_collect_ease_harder", "biomass_collect_ease_same", "biomass_collect_ease_easier",
                                    "biomass_store_less_amt", "biomass_store_same_amt", "biomass_store_more_amt"),
                                  to=c(
                                    "More/Harder", "Same", "Less/Easier",
                                    "Less/Easier", "Same", "More/Harder",
                                    "Less/Easier", "Same", "More/Harder",
                                    "Less/Easier", "Same", "More/Harder",
                                    "More/Harder", "Same", "Less/Easier",
                                    "Less/Easier", "Same", "More/Harder"))) %>% 
  mutate(
    variable = factor(variable, levels=c(
    "LPG refill acquisition",
    "LPG subsidy deposit wait time",
    "LPG refill cost",
    "Biomass collection amount",
    "Biomass collection difficulty",
    "Biomass reserve amount"
  )),
  change = factor(change, levels= c("More/Harder", "Same",  "Less/Easier")),
  round_digit = factor(round_digit, c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = ifelse((change=="More/Harder"), 0.97,
                                 ifelse(change=="Same", 0.50,
                                        ifelse(change=="Less/Easier", 0.03, NA))))


covid_energy_access_reduced_lpg_refill_fig <- ggplot(covid_energy_access_reduced_df %>% 
                                                       filter(variable=="LPG refill acquisition"), 
                                  aes(x=round_digit, y=value, 
                                      fill=change, color=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("LPG refill acquisition") + 
  geom_text(data=covid_energy_access_reduced_df %>% 
              filter(variable=="LPG refill acquisition"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 


covid_energy_access_reduced_lpg_subsidy_fig <- ggplot(covid_energy_access_reduced_df %>% 
                                                       filter(variable=="LPG subsidy deposit wait time"), 
                                                     aes(x=round_digit, y=value, 
                                                         fill=change, color=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("LPG subsidy deposit wait time") + 
  geom_text(data=covid_energy_access_reduced_df %>% 
              filter(variable=="LPG subsidy deposit wait time") %>% 
              mutate(label_position = ifelse(round_digit=="4" &
                                               change=="Same", 0.3, label_position)), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        # axis.text.y = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 

covid_energy_lpg_refill_cost_fig <- ggplot(covid_energy_access_reduced_df %>% 
                                                       filter(variable=="LPG refill cost"), 
                                                     aes(x=round_digit, y=value, 
                                                         fill=change, color=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("LPG refill cost") + 
  geom_text(data=covid_energy_access_reduced_df %>% 
              filter(variable=="LPG refill cost") %>% 
              mutate(label_position = ifelse(round_digit=="2" &
                                               change=="Same", 0.55,
                                             ifelse(round_digit=="3" &
                                                      change=="Same", 0.66,
                                                    ifelse(round_digit=="4" &
                                                             change=="Same", 0.05,
                                                           ifelse((round_digit=="4"|round_digit=="5"|round_digit=="6") &
                                                                    change=="Less/Easier", 0.01,
                                                                  ifelse((round_digit=="5" |
                                                                           round_digit=="6") &
                                                                           change=="Same", 0.05, label_position)))))), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        # axis.text.y = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 


covid_energy_access_reduced_biomass_collect_fig <- ggplot(covid_energy_access_reduced_df %>% 
                                                       filter(variable=="Biomass collection amount"), 
                                                     aes(x=round_digit, y=value, 
                                                         fill=change, color=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Biomass collection amount") + 
  geom_text(data=covid_energy_access_reduced_df %>% 
              filter(variable=="Biomass collection amount"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        # axis.text.y = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 


covid_energy_access_reduced_biomass_difficulty_fig <- ggplot(covid_energy_access_reduced_df %>% 
                                                            filter(variable=="Biomass collection difficulty"), 
                                                          aes(x=round_digit, y=value, 
                                                              fill=change, color=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Biomass collection difficulty") + 
  geom_text(data=covid_energy_access_reduced_df %>% 
              filter(variable=="Biomass collection difficulty"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        # axis.text.y = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 

covid_energy_access_reduced_biomass_reserve_fig <- ggplot(covid_energy_access_reduced_df %>% 
                                                               filter(variable=="Biomass reserve amount"), 
                                                             aes(x=round_digit, y=value, 
                                                                 fill=change, color=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Biomass reserve amount") + 
  geom_text(data=covid_energy_access_reduced_df %>% 
              filter(variable=="Biomass reserve amount"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  
  scale_y_continuous(labels = scales::percent_format()) +
  coord_cartesian(ylim=c(0,1)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        # axis.text.y = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank()) 


covid_energy_reduced_fig_alt <- plot_grid(
  covid_energy_access_reduced_lpg_refill_fig,
  covid_energy_access_reduced_lpg_subsidy_fig,
  covid_energy_lpg_refill_cost_fig,
  covid_energy_access_reduced_biomass_collect_fig,
  align="hv",
  nrow=1
)


covid_energy_reduced_fig_alt_1 <- plot_grid(
  NULL, 
  covid_energy_reduced_fig_alt,
  align="hv",
  nrow=2,
  rel_heights=c(0.1, 1),
  label_x = -0.2,
  labels = c("Reported COVID-19 related changes to energy access")
)


# Figure 2c  ------------

ever_covid_energy_change_df <- jsurveys_1 %>%
  group_by(hhid) %>% 
  summarize(kerosene_use_less = mean(d_0_3_1_kerosene_use_compare=="3", na.rm=T),
            kerosene_use_same = mean(d_0_3_1_kerosene_use_compare=="2", na.rm=T),
            kerosene_use_more = mean(d_0_3_1_kerosene_use_compare=="1", na.rm=T),
            
            kerosene_hours_less = mean(d_0_3_2_kerosene_use_compare_hours=="3", na.rm=T),
            kerosene_hours_same = mean(d_0_3_2_kerosene_use_compare_hours=="2", na.rm=T),
            kerosene_hours_more = mean(d_0_3_2_kerosene_use_compare_hours=="1", na.rm=T),
            
            cook_hours_less = mean(d_8_1_cook_hours_compare_inlockdown=="3", na.rm=T),
            cook_hours_same = mean(d_8_1_cook_hours_compare_inlockdown=="2", na.rm=T),
            cook_hours_more = mean(d_8_1_cook_hours_compare_inlockdown=="1", na.rm=T),
            
            cook_recall_hours_less = mean(d_8_2_cook_hours_compare_recalllockdown=="3", na.rm=T),
            cook_recall_hours_same = mean(d_8_2_cook_hours_compare_recalllockdown=="2", na.rm=T),
            cook_recall_hours_more = mean(d_8_2_cook_hours_compare_recalllockdown=="1", na.rm=T),
            
            lpg_less = mean(d_9_lpg_use_compare=="2", na.rm=T),
            lpg_same = mean(d_9_lpg_use_compare=="3", na.rm=T),
            lpg_more = mean(d_9_lpg_use_compare=="1", na.rm=T),
            
            polluting_less = mean(d_10_pollutingfuel_use_compare=="2", na.rm=T),
            polluting_same = mean(d_10_pollutingfuel_use_compare=="3", na.rm=T),
            polluting_more = mean(d_10_pollutingfuel_use_compare=="1", na.rm=T),
            
            biomass_collect_less_freq = mean(f_1_firewood_compare_collect_freq_inlockdown=="3", na.rm=T),
            biomass_collect_same_freq = mean(f_1_firewood_compare_collect_freq_inlockdown=="2", na.rm=T),
            biomass_collect_more_freq = mean(f_1_firewood_compare_collect_freq_inlockdown=="1", na.rm=T),
            
            biomass_collect_less_amt = mean(f_2_firewood_compare_collect_amt_inlockdown=="3", na.rm=T),
            biomass_collect_same_amt = mean(f_2_firewood_compare_collect_amt_inlockdown=="2", na.rm=T),
            biomass_collect_more_amt = mean(f_2_firewood_compare_collect_amt_inlockdown=="1", na.rm=T),
            
            biomass_store_less_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="3", na.rm=T),
            biomass_store_same_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="2", na.rm=T),
            biomass_store_more_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="1", na.rm=T),
            
            biomass_collect_ease_harder = mean(f_3_firewood_compare_reserve_amt_inlockdown=="3", na.rm=T),
            biomass_collect_ease_same = mean(f_3_firewood_compare_reserve_amt_inlockdown=="2", na.rm=T),
            biomass_collect_ease_easier = mean(f_3_firewood_compare_reserve_amt_inlockdown=="1", na.rm=T)
  ) %>% 
  # ungroup() %>% 
  mutate(kerosene_use_less = ifelse(kerosene_use_less>0, 1, 0),
         kerosene_use_same = ifelse(kerosene_use_same>0, 1, 0),
         kerosene_use_more = ifelse(kerosene_use_more>0, 1, 0),
         
         kerosene_hours_less = ifelse(kerosene_hours_less>0, 1, 0),
         kerosene_hours_same = ifelse(kerosene_hours_same>0, 1, 0),
         kerosene_hours_more = ifelse(kerosene_hours_more>0, 1, 0),
         
         cook_hours_less = ifelse(cook_hours_less>0, 1, 0),
         cook_hours_same = ifelse(cook_hours_same>0, 1, 0),
         cook_hours_more = ifelse(cook_hours_more>0, 1, 0),
         
         cook_recall_hours_less = ifelse(cook_recall_hours_less>0, 1, 0),
         cook_recall_hours_same = ifelse(cook_recall_hours_same>0, 1, 0),
         cook_recall_hours_more = ifelse(cook_recall_hours_more>0, 1, 0),
         
         lpg_less = ifelse(lpg_less>0, 1, 0),
         lpg_same = ifelse(lpg_same>0, 1, 0),
         lpg_more = ifelse(lpg_more>0, 1, 0),
         
         polluting_less = ifelse(polluting_less>0, 1, 0),
         polluting_same = ifelse(polluting_same>0, 1, 0),
         polluting_more = ifelse(polluting_more>0, 1, 0),
         
         biomass_collect_less_freq = ifelse(biomass_collect_less_freq>0, 1, 0),
         biomass_collect_same_freq = ifelse(biomass_collect_same_freq>0, 1, 0),
         biomass_collect_more_freq = ifelse(biomass_collect_more_freq>0, 1, 0),
         
         biomass_collect_less_amt = ifelse(biomass_collect_less_amt>0, 1, 0),
         biomass_collect_same_amt = ifelse(biomass_collect_same_amt>0, 1, 0),
         biomass_collect_more_amt = ifelse(biomass_collect_more_amt>0, 1, 0),
         
         biomass_store_less_amt = ifelse(biomass_store_less_amt>0, 1, 0),
         biomass_store_same_amt = ifelse(biomass_store_same_amt>0, 1, 0),
         biomass_store_more_amt = ifelse(biomass_store_more_amt>0, 1, 0),
         
         biomass_collect_ease_harder = ifelse(biomass_collect_ease_harder>0, 1, 0),
         biomass_collect_ease_same = ifelse(biomass_collect_ease_same>0, 1, 0),
         biomass_collect_ease_easier = ifelse(biomass_collect_ease_easier>0, 1, 0)) %>% 
  summarize(kerosene_use_less = mean(kerosene_use_less, na.rm=T),
            kerosene_use_same = mean(kerosene_use_same, na.rm=T),
            kerosene_use_more = mean(kerosene_use_more, na.rm=T),
            
            kerosene_hours_less = mean(kerosene_hours_less, na.rm=T),
            kerosene_hours_same = mean(kerosene_hours_same, na.rm=T),
            kerosene_hours_more = mean(kerosene_hours_more, na.rm=T),
            
            cook_hours_less = mean(cook_hours_less, na.rm=T),
            cook_hours_same = mean(cook_hours_same, na.rm=T),
            cook_hours_more = mean(cook_hours_more, na.rm=T),
            
            cook_recall_hours_less = mean(cook_recall_hours_less, na.rm=T),
            cook_recall_hours_same = mean(cook_recall_hours_same, na.rm=T),
            cook_recall_hours_more = mean(cook_recall_hours_more, na.rm=T),
            
            lpg_less = ifelse(lpg_less>0, 1, 0),
            lpg_same = ifelse(lpg_same>0, 1, 0),
            lpg_more = ifelse(lpg_more>0, 1, 0),
            
            polluting_less = mean(polluting_less, na.rm=T),
            polluting_same = mean(polluting_same, na.rm=T),
            polluting_more = mean(polluting_more, na.rm=T),
            
            biomass_collect_less_freq = mean(biomass_collect_less_freq, na.rm=T),
            biomass_collect_same_freq = mean(biomass_collect_same_freq, na.rm=T),
            biomass_collect_more_freq = mean(biomass_collect_more_freq, na.rm=T),
            
            biomass_collect_less_amt = mean(biomass_collect_less_amt, na.rm=T),
            biomass_collect_same_amt = mean(biomass_collect_same_amt, na.rm=T),
            biomass_collect_more_amt = mean(biomass_collect_more_amt, na.rm=T),
            
            biomass_store_less_amt = mean(biomass_store_less_amt, na.rm=T),
            biomass_store_same_amt = mean(biomass_store_same_amt, na.rm=T),
            biomass_store_more_amt = mean(biomass_store_more_amt, na.rm=T),
            
            biomass_collect_ease_harder = mean(biomass_collect_ease_harder, na.rm=T),
            biomass_collect_ease_same = mean(biomass_collect_ease_same, na.rm=T),
            biomass_collect_ease_easier = mean(biomass_collect_ease_easier, na.rm=T)) %>% 
  mutate(round = "Ever Reported") %>% 
  pivot_longer(-round,"parameter", "value")

jsurveys %>% 
  ungroup() %>% 
  dplyr::mutate(e_8_refill_next_wks = as.numeric(e_8_refill_next_wks),
         e_2_refill_lpg_wksago = as.numeric(e_2_refill_lpg_wksago)) %>% 
  dplyr::mutate(gap = as.numeric(jsurveys_1$e_8_refill_next_wks) + as.numeric(jsurveys_1$e_2_refill_lpg_wksago)) %>% 
  dplyr::filter(!is.na(e_8_refill_next_wks) & !is.na(e_2_refill_lpg_wksago)) %>% 
  dplyr::group_by(round) %>% 
  dplyr::summarize(n=n(),
                   mean = mean(gap, na.rm=T),
            median = median(gap, na.rm=T))

covid_energy_change_df <- jsurveys_1 %>% 
  group_by(round) %>% 
  summarize(kerosene_use_less = mean(d_0_3_1_kerosene_use_compare=="3", na.rm=T),
            kerosene_use_same = mean(d_0_3_1_kerosene_use_compare=="2", na.rm=T),
            kerosene_use_more = mean(d_0_3_1_kerosene_use_compare=="1", na.rm=T),
            
            # kerosene_hours_less = mean(d_0_3_2_kerosene_use_compare_hours=="3", na.rm=T),
            # kerosene_hours_same = mean(d_0_3_2_kerosene_use_compare_hours=="2", na.rm=T),
            # kerosene_hours_more = mean(d_0_3_2_kerosene_use_compare_hours=="1", na.rm=T),
            
            cook_hours_less = mean(d_8_1_cook_hours_compare_inlockdown=="3", na.rm=T),
            cook_hours_same = mean(d_8_1_cook_hours_compare_inlockdown=="2", na.rm=T),
            cook_hours_more = mean(d_8_1_cook_hours_compare_inlockdown=="1", na.rm=T),
            
            # cook_recall_hours_less = mean(d_8_2_cook_hours_compare_recalllockdown=="3", na.rm=T),
            # cook_recall_hours_same = mean(d_8_2_cook_hours_compare_recalllockdown=="2", na.rm=T),
            # cook_recall_hours_more = mean(d_8_2_cook_hours_compare_recalllockdown=="1", na.rm=T),
            
            lpg_less = mean(d_9_lpg_use_compare=="2", na.rm=T),
            lpg_same = mean(d_9_lpg_use_compare=="3", na.rm=T),
            lpg_more = mean(d_9_lpg_use_compare=="1", na.rm=T),
            
            polluting_less = mean(d_10_pollutingfuel_use_compare=="2", na.rm=T),
            polluting_same = mean(d_10_pollutingfuel_use_compare=="3", na.rm=T),
            polluting_more = mean(d_10_pollutingfuel_use_compare=="1", na.rm=T)
            
            # biomass_collect_less_freq = mean(f_1_firewood_compare_collect_freq_inlockdown=="3", na.rm=T),
            # biomass_collect_same_freq = mean(f_1_firewood_compare_collect_freq_inlockdown=="2", na.rm=T),
            # biomass_collect_more_freq = mean(f_1_firewood_compare_collect_freq_inlockdown=="1", na.rm=T),
            # 
            # biomass_collect_less_amt = mean(f_2_firewood_compare_collect_amt_inlockdown=="3", na.rm=T),
            # biomass_collect_same_amt = mean(f_2_firewood_compare_collect_amt_inlockdown=="2", na.rm=T),
            # biomass_collect_more_amt = mean(f_2_firewood_compare_collect_amt_inlockdown=="1", na.rm=T),
            # 
            # biomass_store_less_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="3", na.rm=T),
            # biomass_store_same_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="2", na.rm=T),
            # biomass_store_more_amt = mean(f_3_firewood_compare_reserve_amt_inlockdown=="1", na.rm=T),
            # 
            # biomass_collect_ease_harder = mean(f_3_firewood_compare_reserve_amt_inlockdown=="3", na.rm=T),
            # biomass_collect_ease_same = mean(f_3_firewood_compare_reserve_amt_inlockdown=="2", na.rm=T),
            # biomass_collect_ease_easier = mean(f_3_firewood_compare_reserve_amt_inlockdown=="1", na.rm=T),
  ) %>% 
  pivot_longer(-round,"parameter", "value") %>% 
  bind_rows(ever_covid_energy_change_df) %>% 
  mutate(round_digit = plyr::mapvalues(round,
                                       from=c("Round 1", "Round 2", "Round 3", "Round 4",
                                              "Round 5", "Round 6", "Ever Reported"),
                                       to=c("1", "2", "3", "4", "5", "6", "Ever Reported"))) %>% 
  mutate(variable = plyr::mapvalues(parameter,
                                    from=c("kerosene_use_less", "kerosene_use_same", "kerosene_use_more",
                                           # "kerosene_hours_less", "kerosene_hours_same", "kerosene_hours_more",
                                           
                                           "cook_hours_less", "cook_hours_same", "cook_hours_more",
                                           # "cook_recall_hours_less", "cook_recall_hours_same", "cook_recall_hours_more",
                                           
                                           "lpg_less", "lpg_same", "lpg_more",
                                           "polluting_less", "polluting_same", "polluting_more"
                                           
                                           # "biomass_collect_less_freq", "biomass_collect_same_freq", "biomass_collect_more_freq",
                                           # "biomass_collect_less_amt", "biomass_collect_same_amt", "biomass_collect_more_amt",
                                           # "biomass_collect_ease_harder", "biomass_collect_ease_same", "biomass_collect_ease_easier",
                                           # "biomass_store_less_amt", "biomass_store_same_amt", "biomass_store_more_amt"),
                                    ),
                                    to=c("Kerosene use", "Kerosene use", "Kerosene use",
                                         # "Kerosene use: hours", "Kerosene use: hours", "Kerosene use: hours",
                                         
                                         "Cooking (hrs/day)", "Cooking (hrs/day)", "Cooking (hrs/day)",
                                         # "Cook hours (recall)", "Cook hours (recall)", "Cook hours (recall)",
                                         
                                         "LPG use", "LPG use", "LPG use", 
                                         "Polluting fuel use", "Polluting fuel use", "Polluting fuel use"
                                         
                                         # "Biomass collection frequency", "Biomass collection frequency", "Biomass collection frequency", 
                                         # "Biomass collection amount", "Biomass collection amount", "Biomass collection amount", 
                                         # "Biomass collection difficulty", "Biomass collection difficulty", "Biomass collection difficulty", 
                                         # "Biomass reserve amount", "Biomass reserve amount", "Biomass reserve amount")),
                                    )),
         change = plyr::mapvalues(parameter,
                                  from=c("kerosene_use_less", "kerosene_use_same", "kerosene_use_more",
                                         # "kerosene_hours_less", "kerosene_hours_same", "kerosene_hours_more",
                                         
                                         "cook_hours_less", "cook_hours_same", "cook_hours_more",
                                         # "cook_recall_hours_less", "cook_recall_hours_same", "cook_recall_hours_more",
                                         
                                         "lpg_less", "lpg_same", "lpg_more",
                                         "polluting_less", "polluting_same", "polluting_more"
                                         
                                         # "biomass_collect_less_freq", "biomass_collect_same_freq", "biomass_collect_more_freq",
                                         # "biomass_collect_less_amt", "biomass_collect_same_amt", "biomass_collect_more_amt",
                                         # "biomass_collect_ease_harder", "biomass_collect_ease_same", "biomass_collect_ease_easier",
                                         # "biomass_store_less_amt", "biomass_store_same_amt", "biomass_store_more_amt"),
                                  ),
                                  to=c("Less", "Same", "More",
                                       # "Less", "Same", "More",
                                       
                                       "Less", "Same", "More",
                                       # "Less", "Same", "More",
                                       
                                       "Less", "Same", "More",
                                       "Less", "Same", "More"
                                       
                                       # "Less", "Same", "More",
                                       # "Less", "Same", "More",
                                       # "More", "Same", "Less",
                                       # "Less", "Same", "More"
                                       ))) %>% 
  mutate(variable = factor(variable, levels=c("Kerosene use", 
                                              # "Kerosene use: hours", 
                                              
                                              "Cooking (hrs/day)", 
                                              # "Cook hours (recall)", 
                                              
                                              "LPG use", 
                                              "Polluting fuel use"
                                              
                                              # "Biomass collection frequency",
                                              # "Biomass collection amount", 
                                              # "Biomass collection difficulty",
                                              # "Biomass reserve amount"
                                              )),
         change = factor(change, levels= c("More", "Same", "Less")),
         round = factor(round, levels = rev(c("Round 1", "Round 2", "Round 3", "Round 4",
                                              "Round 5", "Round 6", "Ever Reported"))),
         round_digit = factor(round_digit, c("1", "2", "3", "4", "5", "6", "Ever Reported"))) %>% 
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = ifelse(change=="More", 0.97,
                                 ifelse(change=="Same", 0.50,
                                        ifelse(change=="Less", 0.03, NA)))) %>% 
  mutate(label_position = ifelse(variable=="LPG use" &
                                   round_digit=="4" &
                                   change=="Same", 0.75, 
                                 ifelse(variable=="Polluting fuel use" &
                                          round_digit=="4" &
                                          change=="Same", 0.35, label_position)))


cookfuel_stack_round <- jsurveys_1 %>% 
  dplyr::select(hhid, round, W_cookfuel_stack_category) %>% 
  group_by(round) %>% 
  summarize(exclusive_polluting = mean(W_cookfuel_stack_category=="Exclusive polluting", na.rm=T),
            primary_polluting_secondary_lpg = mean(W_cookfuel_stack_category=="Primary polluting, Secondary LPG", na.rm=T),
            primary_lpg_secondary_polluting = mean(W_cookfuel_stack_category=="Primary LPG, Secondary polluting", na.rm=T),
            exclusive_lpg = mean(W_cookfuel_stack_category=="Exclusive LPG", na.rm=T)) %>% 
  pivot_longer(-round,"parameter", "value") %>% 
  mutate(parameter = plyr::mapvalues(parameter,
                                     from=c("exclusive_polluting", 
                                            "primary_polluting_secondary_lpg",
                                            "primary_lpg_secondary_polluting",
                                            "exclusive_lpg"),
                                     to=c("Exclusive polluting", "Primary polluting, Secondary LPG", "Primary LPG, Secondary polluting", "Exclusive LPG"))) %>% 
  mutate(parameter = factor(parameter, levels = c("Exclusive LPG", "Primary LPG, Secondary polluting", "Primary polluting, Secondary LPG", "Exclusive polluting"))) %>% 
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = ifelse(parameter=="Exclusive polluting", 0.03,
                                 ifelse(parameter == "Primary polluting, Secondary LPG", 0.85,
                                        ifelse(parameter == "Primary LPG, Secondary polluting", 0.50,
                                               ifelse(parameter == "Exclusive LPG", 0.97, NA))))) %>% 
  mutate(round_digit = plyr::mapvalues(round,
                                       from=c("Round 1", "Round 2", "Round 3", "Round 4",
                                              "Round 5", "Round 6"),
                                       to=c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(round_digit = factor(round_digit, levels = c("1", "2", "3", "4", "5", "6"))) %>% 
  mutate(label_position = ifelse(parameter=="Primary polluting, Secondary LPG" &
                                   round_digit=="1", 0.71, 
                                 ifelse(parameter=="Primary polluting, Secondary LPG" &
                                          round_digit=="2", 0.63, 
                                        ifelse(parameter=="Primary polluting, Secondary LPG" &
                                                 round_digit=="3", 0.64, 
                                               ifelse(parameter=="Primary polluting, Secondary LPG" &
                                                        round_digit=="4", 0.56, 
                                                      ifelse(parameter=="Primary polluting, Secondary LPG" &
                                                               round_digit=="5", 0.56,
                                                             ifelse(parameter=="Primary polluting, Secondary LPG" &
                                                                      round_digit=="6", 0.68, label_position))))))) %>% 
  mutate(label_position = ifelse(parameter=="Primary LPG, Secondary polluting" &
                                   round_digit=="1", .45, 
                                 ifelse(parameter=="Primary LPG, Secondary polluting" &
                                          round_digit=="2", 0.40, 
                                        ifelse(parameter=="Primary LPG, Secondary polluting" &
                                                 round_digit=="3", 0.40, 
                                               ifelse(parameter=="Primary LPG, Secondary polluting" &
                                                        round_digit=="4", 0.35, 
                                                      ifelse(parameter=="Primary LPG, Secondary polluting" &
                                                               round_digit=="5", 0.40,
                                                             ifelse(parameter=="Primary LPG, Secondary polluting" &
                                                                      round_digit=="6", 0.55, label_position)))))))



covid_energy_change_kerosene_fig <- ggplot(covid_energy_change_df %>% 
                                    filter(round!="Ever Reported" &
                                             variable=="Kerosene use"), aes(x=round_digit, y=value, color=change, fill=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Overall kerosene use for lighting") + 
  geom_text(data=covid_energy_change_df  %>% 
              filter(round!="Ever Reported" &
                       variable=="Kerosene use"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) +
  
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())


covid_energy_change_cookinghrs_fig <- ggplot(covid_energy_change_df %>% 
                                             filter(round!="Ever Reported" &
                                                      variable=="Cooking (hrs/day)"), aes(x=round_digit, y=value, color=change, fill=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Overall cooking (hrs/day)") + 
  geom_text(data=covid_energy_change_df  %>% 
              filter(round!="Ever Reported" &
                       variable=="Cooking (hrs/day)"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) +
  
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())


covid_energy_change_polluting_fig <- ggplot(covid_energy_change_df %>% 
                                               filter(round!="Ever Reported" &
                                                        variable=="Polluting fuel use"), aes(x=round_digit, y=value, color=change, fill=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Overall polluting fuel use for cooking") + 
  geom_text(data=covid_energy_change_df  %>% 
              filter(round!="Ever Reported" &
                       variable=="Polluting fuel use"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) +
  
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())




covid_energy_change_lpg_fig <- ggplot(covid_energy_change_df %>% 
                                              filter(round!="Ever Reported" &
                                                       variable=="LPG use"), aes(x=round_digit, y=value, color=change, fill=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Overall LPG use for cooking") + 
  geom_text(data=covid_energy_change_df  %>% 
              filter(round!="Ever Reported" &
                       variable=="LPG use"), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) +
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        # legend.text = element_text(size=15, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())




lpg_refill_gap_round_fig <- ggplot(jsurveys_1 %>% 
                                     filter(!is.na(lpg_refill_gap)), aes(x=round_digit, y=lpg_refill_gap)) + 
  geom_violin(fill="grey50") + 
  geom_boxplot(width=0.10, outlier.shape = NA) + 
  stat_summary(fun = mean, geom="point", color="red", shape=16, size=2) +
  ggtitle("Anticipated LPG cylinder refill gap") + 
  scale_y_continuous(labels = scales::unit_format(accuracy = 1, suffix=" wks"), 
                     breaks = c(4, 8, 12, 16, 20, 24, 28, 32)) +
  coord_cartesian(ylim=c(1, 16)) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

cookfuel_stack_round_fig <- ggplot(cookfuel_stack_round %>% 
                                     filter(!is.na(parameter)), aes(x=round_digit, y=value, fill=parameter)) + 
  geom_bar(stat="identity") + 
  # ggsci::scale_color_jama() + 
  # ggsci::scale_fill_jama() + 
  scale_color_manual(values=c(met.brewer("Hokusai2",n=4,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai2",n=4,type="discrete"))) +
  geom_text(data=cookfuel_stack_round %>% 
              filter(!is.na(parameter)) %>% 
              mutate(label_position=ifelse(round_digit=="1" &
                                             parameter=="Primary polluting, Secondary LPG", 0.28,
                                           ifelse((round_digit=="2" | 
                                                     round_digit=="3") &
                                                    parameter=="Primary polluting, Secondary LPG", 0.35,
                                                  ifelse((round_digit=="4" | round_digit=="5") &
                                                           parameter=="Primary polluting, Secondary LPG", 0.45, 
                                                         ifelse(round_digit=="6" &
                                                                  parameter=="Primary polluting, Secondary LPG", 0.325, label_position))))) %>% 
              mutate(label_position=ifelse(round_digit=="1" &
                                             parameter=="Primary LPG, Secondary polluting", 0.55,
                                           ifelse((round_digit=="2" | 
                                                     round_digit=="3") &
                                                    parameter=="Primary LPG, Secondary polluting", 0.58,
                                                  ifelse(round_digit=="4" &
                                                           parameter=="Primary LPG, Secondary polluting", 0.69, 
                                                         ifelse(round_digit=="5" &
                                                                   parameter=="Primary LPG, Secondary polluting", 0.63, 
                                                         ifelse(round_digit=="6" &
                                                                  parameter=="Primary LPG, Secondary polluting", 0.42, label_position)))))), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  ggtitle("Cooking fuel stack") + 
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  # guide_legend(reverse = TRUE) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

covid_energy_use_fig_alt <- 
  plot_grid(
    covid_energy_change_kerosene_fig,
    covid_energy_change_polluting_fig,
    covid_energy_change_lpg_fig,
    cookfuel_stack_round_fig,
    nrow=1,
    align="hv"
  )

covid_energy_use_fig_alt_1 <- 
  plot_grid(
    NULL,
    covid_energy_use_fig_alt,
    nrow=2,
    rel_heights=c(0.1, 1),
    labels = c("Reported COVID-19 related energy use changes"),
    label_x=-0.169,
    align="hv"
  )


# Figure 2d: Regression-based approaches to seeing economic impacts of pandemic affecting energy choices --------


# Outcome: No kerosene use for lighting 
econ_kerosene_mod1 <- fixest::feglm(W_kerosene_lighting ~ 
                                      weekly_econ_hardship_any |
                                      hhid + round, jsurveys_1, family=quasibinomial())
# Outcome: Primary LPG use
econ_prim_lpg_mod1 <- fixest::feglm(W_primary_polluting ~ 
                                      # refill + 
                                      weekly_econ_hardship_any |
                                      hhid + 
                                      round, jsurveys_1 %>% 
                                      mutate(refill = as.numeric(e_6_lpgrefill_cost)/100), family=quasibinomial())


# Outcome: No polluting fuel use
econ_no_polluting_mod1 <- fixest::feglm(W_any_polluting ~ 
                                          weekly_econ_hardship_any |
                                          hhid + 
                                          round, jsurveys_1 %>% 
                                          mutate(W_any_polluting = plyr::mapvalues(W_no_polluting,
                                                                                   from=c(0,1),
                                                                                   to=c(1,0))), family=quasibinomial())

econ_regs_combined <- tidy(econ_kerosene_mod1) %>% 
  mutate(outcome = "Kerosene used\nfor lighting") %>% 
  bind_rows(tidy(econ_prim_lpg_mod1) %>% 
                   mutate(outcome = "Primary cookfuel\nis polluting")) %>% 
  bind_rows(tidy(econ_no_polluting_mod1) %>% 
                   mutate(outcome = "Any polluting\ncookfuel use")) %>% 
  mutate(OR = exp(estimate),
         OR.low = exp(estimate - (1.96*std.error)),
         OR.high = exp(estimate + (1.96*std.error))) %>% 
  mutate(outcome = factor(outcome, levels=c("Kerosene used\nfor lighting", "Any polluting\ncookfuel use", "Primary cookfuel\nis polluting")))

econ_regs_fig <- ggplot(econ_regs_combined, aes(x=outcome, y=OR, ymin=OR.low, ymax=OR.high)) + 
  geom_hline(yintercept=1) + 
  geom_pointrange() +
  annotate("text", x=1, y=2.7, label="OR: 1.8\n(95% CI: 1.4, 2.4)", size=3) + 
  annotate("text", x=3, y=3.5, label="OR: 2.4\n(95% CI: 1.8, 3.2)", size=3) + 
  annotate("text", x=2, y=4.5, label="OR: 3.0\n(95% CI: 2.2, 4.2)", size=3) + 
  scale_y_continuous(breaks=c(1, 2, 3, 4, 5)) + 
  ylab("Odds ratio\n(95% confidence interval)") + 
  coord_cartesian(ylim=c(1, 5)) + 
  ggtitle("Association of reported economic hardship with energy use") +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(size=11, color="black", face="bold"),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        # plot.title = element_text(size=10, color="black", face="bold"),
        plot.title = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())


econ_regs_fig


# Figure 2: Making legends -------

covid_energy_change_lpg_fig_legend <- ggplot(covid_energy_change_df %>% 
                                        filter(round!="Ever Reported" &
                                                 variable=="LPG use") %>% 
                                          mutate(change = plyr::mapvalues(change, 
                                                                    from=c("Less", 
                                                                           "Same", 
                                                                           "More"),
                                                                    to=c("Less / Easier",
                                                                         "Same", 
                                                                         "More / Harder"))), aes(x=round_digit, y=value, color=change, fill=change)) + 
  geom_bar(stat = "identity") +
  ggtitle("Overall LPG use for cooking") + 
  geom_text(data=covid_energy_change_df  %>% 
              filter(round!="Ever Reported" &
                       variable=="LPG use") %>% 
              mutate(change = plyr::mapvalues(change, 
                                              from=c("Less", 
                                                     "Same", 
                                                     "More"),
                                              to=c("Less / Easier",
                                                   "Same", 
                                                   "More / Harder"))), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  
  scale_color_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai3",n=3,type="discrete"))) +
  
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "right",
        # strip.text = element_text(size=10),
        legend.title = element_blank(),
        legend.text = element_text(size=11, color="black", face = "bold"),
        legend.background = element_blank(),
        plot.background = element_blank())

covid_energy_change_lpg_fig_legend_1 <- get_legend(covid_energy_change_lpg_fig_legend)

cookfuel_stack_round_fig_legend <- ggplot(cookfuel_stack_round %>% 
                                     filter(!is.na(parameter)), aes(x=round_digit, y=value, fill=parameter)) + 
  geom_bar(stat="identity") + 
  scale_color_manual(values=c(met.brewer("Hokusai2",n=4,type="discrete"))) + 
  scale_fill_manual(values=c(met.brewer("Hokusai2",n=4,type="discrete"))) +
  geom_text(data=cookfuel_stack_round %>% 
              filter(!is.na(parameter)), 
            aes(x=round_digit, y=label_position, 
                label=value_label), size=2, color="white") + 
  ggtitle("Cooking fuel stack") + 
  scale_y_continuous(labels = scales::percent_format(),
                     limits = c(0, 1)) +
  # guide_legend(reverse = TRUE) + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=10, color="black"),
        axis.text.y = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "right",
        legend.title = element_blank(),
        legend.text = element_text(size=11, color="black", face="bold"),
        legend.background = element_blank(),
        plot.background = element_blank())

cookfuel_stack_round_fig_legend_1 <- get_legend(cookfuel_stack_round_fig_legend)



# Figure 2: Combining figures -------

figure2_legends <- plot_grid(
  covid_energy_change_lpg_fig_legend_1,
  cookfuel_stack_round_fig_legend_1,
  rel_widths = c(.55, 1),
  nrow=1
)

figure2a <- plot_grid(
  econ_regs_fig,
  figure2_legends,
  nrow=1
)

figure2a_1 <- plot_grid(
  NULL,
  figure2a,
  nrow=2,
  rel_heights = c(.1, 1),
  label_x = -0.25,
  labels = c("Association between reported economic hardship and energy use")
)

figure2 <- plot_grid(
  NULL,
  covid_hardship_reduced_fig_alt_1,
  NULL,
  covid_energy_reduced_fig_alt_1,
  NULL,
  covid_energy_use_fig_alt_1,
  NULL,
  figure2a_1,
  align="hv",
  labels=c("a", "", "b", "", "c", "", "d"),
  label_y=0.3,
  label_x=0,
  label_size=16,
  rel_heights=c(.05, 1, 0.1, 1, .1, 1, .1, 1),
  nrow=8
)


figure2_1 <- figure2 + 
  annotate("text", x=0.036, y=0.771, label="Round", size=4) + 
  annotate("text", x=0.036, y=0.5185, label="Round", size=4) + 
  annotate("text", x=0.036, y=0.266, label="Round", size=4) 


# cowplot::ggsave2("~/BurkeLab Dropbox/Carlos Gould/Jharkhand-COVID19/Code/figures/figure2.pdf",
#                  figure2_1,
#                  height = 325,
#                  width = 270,
#                  units = "mm",
#                  dpi=300)


# 7. Fuel stacking variability and reasons ---------

fuel_combinations_j <- 
  jsurveys_1 %>% 
  dplyr::select(
    hhid, round,
    d_0_all_lighting_grid,
    d_0_all_lighting_kerosene,
    d_0_all_lighting_microgrid,
    d_0_all_lighting_solar,
    d_0_all_lighting_other,
    
    d_1_1_firewood, 
    d_1_2_any_cook_fuel_agresidue,
    d_1_3_any_cook_fuel_dung,
    d_1_4_any_cook_fuel_charcoal,
    d_1_5_any_cook_fuel_coal,
    d_1_6_any_cook_fuel_kerosene,
    d_1_7_any_cook_fuel_alcohol,
    d_1_8_any_cook_fuel_gasoline,
    d_1_9_any_cook_fuel_lpg,
    d_1_10_any_cook_fuel_biogas,
    d_1_11_any_cook_fuel_electricity,
    d_1_12_any_cook_fuel_other
  ) %>% 
  arrange(hhid, round) %>% 
  mutate(
    d_0_all_lighting_other = ifelse(!is.na(d_0_all_lighting_other), 1, NA),
    d_1_12_any_cook_fuel_other = ifelse(!is.na(d_1_12_any_cook_fuel_other), 1, NA)
  ) %>% 
  mutate(
    d_0_all_lighting_grid = replace_na(d_0_all_lighting_grid, 0),
    d_0_all_lighting_kerosene = replace_na(d_0_all_lighting_kerosene, 0),
    d_0_all_lighting_microgrid = replace_na(d_0_all_lighting_microgrid, 0),
    d_0_all_lighting_solar = replace_na(d_0_all_lighting_solar, 0),
    d_0_all_lighting_other = replace_na(d_0_all_lighting_other, 0),
    
    d_1_1_firewood = replace_na(d_1_1_firewood, 0),
    d_1_2_any_cook_fuel_agresidue = replace_na(d_1_2_any_cook_fuel_agresidue, 0),
    d_1_3_any_cook_fuel_dung = replace_na(d_1_3_any_cook_fuel_dung, 0),
    d_1_4_any_cook_fuel_charcoal = replace_na(d_1_4_any_cook_fuel_charcoal, 0),
    d_1_5_any_cook_fuel_coal = replace_na(d_1_5_any_cook_fuel_coal, 0),
    d_1_6_any_cook_fuel_kerosene = replace_na(d_1_6_any_cook_fuel_kerosene, 0),
    d_1_7_any_cook_fuel_alcohol = replace_na(d_1_7_any_cook_fuel_alcohol, 0),
    d_1_8_any_cook_fuel_gasoline = replace_na(d_1_8_any_cook_fuel_gasoline, 0),
    d_1_9_any_cook_fuel_lpg = replace_na(d_1_9_any_cook_fuel_lpg, 0),
    d_1_10_any_cook_fuel_biogas = replace_na(d_1_10_any_cook_fuel_biogas, 0),
    d_1_11_any_cook_fuel_electricity = replace_na(d_1_11_any_cook_fuel_electricity, 0),
    d_1_12_any_cook_fuel_other = replace_na(d_1_12_any_cook_fuel_other, 0),
  ) %>% 
  mutate(
    total_lighting_fuels = 
      d_0_all_lighting_grid + d_0_all_lighting_kerosene + 
      d_0_all_lighting_microgrid + d_0_all_lighting_solar +
      d_0_all_lighting_other,
    total_cook_fuels = d_1_1_firewood +  
      d_1_2_any_cook_fuel_agresidue + 
      d_1_3_any_cook_fuel_dung + 
      d_1_4_any_cook_fuel_charcoal + 
      d_1_5_any_cook_fuel_coal + 
      d_1_6_any_cook_fuel_kerosene + 
      d_1_7_any_cook_fuel_alcohol + 
      d_1_8_any_cook_fuel_gasoline + 
      d_1_9_any_cook_fuel_lpg + 
      d_1_10_any_cook_fuel_biogas + 
      d_1_11_any_cook_fuel_electricity + 
      d_1_12_any_cook_fuel_other
  ) 

total_fuels_round_j <- 
  fuel_combinations_j %>% 
  group_by(round) %>%
  summarize(
    n_total=n(),
    n_lighting = sum(!is.na(total_lighting_fuels)),
    mean_lighting = mean(total_lighting_fuels, na.rm=T),
    sd_lighting = sd(total_lighting_fuels, na.rm=T),
    
    median_lighting = median(total_lighting_fuels, na.rm=T),
    q1_lighting = quantile(total_lighting_fuels, probs=.25, na.rm=T),
    q3_lighting = quantile(total_lighting_fuels, probs=.75, na.rm=T),
    
    min_lighting = min(total_lighting_fuels, na.rm=T),
    max_lighting = max(total_lighting_fuels, na.rm=T),
    
    more_than_one_lighting = mean(total_lighting_fuels>1, na.rm=T),
    more_than_two_lighting = mean(total_lighting_fuels>2, na.rm=T),
    
    n_cook = sum(!is.na(total_cook_fuels)),
    mean_cook = mean(total_cook_fuels, na.rm=T),
    sd_cook = sd(total_cook_fuels, na.rm=T),
    
    median_cook = median(total_cook_fuels, na.rm=T),
    q1_cook = quantile(total_cook_fuels, probs=.25, na.rm=T),
    q3_cook = quantile(total_cook_fuels, probs=.75, na.rm=T),
    
    min_cook = min(total_cook_fuels, na.rm=T),
    max_cook = max(total_cook_fuels, na.rm=T),
    
    more_than_one_cook = mean(total_cook_fuels>1, na.rm=T),
    more_than_two_cook = mean(total_cook_fuels>2, na.rm=T),
    
  )



total_fuels_summary_table <- tableby(round ~ 
                                       total_lighting_fuels + 
                                       total_cook_fuels,
                                     
                                     fuel_combinations_j, 
                                     control = mycontrols)

summary(total_fuels_summary_table,pfootnote = T)
# write2word(summary(total_fuels_summary_table), "~/Desktop/total_fuels_jharkhand.doc")



# Supplementary Figure 2 -------
cookfuel_upset <- 
  upset(
    fuel_combinations_j[,8:19] %>% 
      rename(Firewood = d_1_1_firewood, 
             `Agricultural Residues` = d_1_2_any_cook_fuel_agresidue, 
             Dung = d_1_3_any_cook_fuel_dung,
             Charcoal = d_1_4_any_cook_fuel_charcoal, 
             Coal = d_1_5_any_cook_fuel_coal, 
             Kerosene = d_1_6_any_cook_fuel_kerosene, 
             Alcohol = d_1_7_any_cook_fuel_alcohol, 
             Gasoline = d_1_8_any_cook_fuel_gasoline, 
             LPG = d_1_9_any_cook_fuel_lpg, 
             Biogas = d_1_10_any_cook_fuel_biogas, 
             Electricity = d_1_11_any_cook_fuel_electricity, 
             Other = d_1_12_any_cook_fuel_other) %>% 
      dplyr::select(-Other, -Alcohol, -Gasoline) %>% 
      as.data.frame(),
    sets=c(
      "Firewood", 
      "Agricultural Residues", 
      "Dung",
      "Charcoal", 
      "Coal", 
      "Kerosene", 
      "LPG", 
      "Biogas", 
      "Electricity"
    ),
    order.by="freq",
    empty.intersections = "off"
  )

cookfuel_upset

lighting_upset <- 
  upset(
    fuel_combinations_j[,3:7] %>% 
      rename(Grid = d_0_all_lighting_grid, 
             Kerosene = d_0_all_lighting_kerosene, 
             Microgrid = d_0_all_lighting_microgrid,
             Solar = d_0_all_lighting_solar, 
             Other = d_0_all_lighting_other) %>% 
      # dplyr::select(-Other, -Alcohol, -Gasoline) %>% 
      as.data.frame(),
    sets=c(
      "Grid", 
      "Kerosene", 
      "Microgrid",
      "Solar", 
      "Other"
    ),
    order.by="freq",
    empty.intersections = "off"
  )

lighting_upset

# Stacking category last round vs. stacking category this round ----------

# Jharkhand --------

jsurveys_stacking <- jsurveys_1 %>% 
  dplyr::select(hhid, round, W_cookfuel_stack_category, B_cookfuel_stack_category) %>% 
  arrange(hhid, round) %>% 
  group_by(hhid) %>% 
  mutate(W_cookfuel_stack_category_last = lag(W_cookfuel_stack_category)) %>% 
  ungroup() 

table(jsurveys_stacking$W_cookfuel_stack_category, jsurveys_stacking$W_cookfuel_stack_category_last)

surveys_stacking_long <- jsurveys_stacking %>% 
  select(-B_cookfuel_stack_category) %>% 
  pivot_longer(-c(round, hhid)) %>% 
  group_by(hhid, round) %>%
  mutate(instance = ifelse(name=="W_cookfuel_stack_category", "Next", "Previous"))


jsurveys_stacking_alluvial_summary <- 
  jsurveys_stacking %>% 
  filter(!is.na(W_cookfuel_stack_category_last)) %>% 
  group_by(W_cookfuel_stack_category_last) %>% 
  summarize(`Exclusive polluting` = sum(W_cookfuel_stack_category=="Exclusive polluting", na.rm=T),
            `Primary polluting, Secondary LPG` = sum(W_cookfuel_stack_category=="Primary polluting, Secondary LPG", na.rm=T),
            `Primary LPG, Secondary polluting` = sum(W_cookfuel_stack_category=="Primary LPG, Secondary polluting", na.rm=T),
            `Exclusive LPG` = sum(W_cookfuel_stack_category=="Exclusive LPG", na.rm=T),
  ) %>% 
  pivot_longer(-W_cookfuel_stack_category_last) %>% 
  dplyr::rename(`Previous Round` = W_cookfuel_stack_category_last,
                `Current Round` = name) %>% 
  mutate(`Previous Round` = plyr::mapvalues(`Previous Round`, 
                                            from=c("Exclusive LPG", "Primary LPG, Secondary polluting",
                                                   "Primary polluting, Secondary LPG", "Exclusive polluting"),
                                            to=c("Exclusive\nLPG", "Primary\nLPG,\nSecondary\npolluting",
                                                 "Primary\npolluting,\nSecondary\nLPG", "Exclusive\npolluting")),
         `Current Round` = plyr::mapvalues(`Current Round`, 
                                           from=c("Exclusive LPG", "Primary LPG, Secondary polluting",
                                                  "Primary polluting, Secondary LPG", "Exclusive polluting"),
                                           to=c("Exclusive\nLPG", "Primary\nLPG,\nSecondary\npolluting",
                                                "Primary\npolluting,\nSecondary\nLPG", "Exclusive\npolluting"))) %>% 
  mutate(`Previous Round` = factor(`Previous Round`, levels=c("Exclusive\nLPG", "Primary\nLPG,\nSecondary\npolluting",
                                                              "Primary\npolluting,\nSecondary\nLPG", "Exclusive\npolluting")),
         `Current Round` = factor(`Current Round`, levels=c("Exclusive\nLPG", "Primary\nLPG,\nSecondary\npolluting",
                                                            "Primary\npolluting,\nSecondary\nLPG", "Exclusive\npolluting"))) %>% 
  mutate(percent = value/2700)

jharkhand_sankey_fig <- ggplot(jsurveys_stacking_alluvial_summary,
                               aes(axis1 = `Previous Round`, axis2 = `Current Round`, y=percent)) +
  geom_alluvium(aes(fill = c("#808fe1", "#808fe1", "#808fe1", "#808fe1",
                             "#efc86e","#efc86e","#efc86e","#efc86e",
                             "#97c684","#97c684","#97c684","#97c684",
                             "#574571","#574571","#574571","#574571")), width = 1/4) +
  geom_stratum(width = 1/4, 
               fill = rev(c("#574571", "#808fe1","#97c684","#efc86e", 
                            "#574571", "#808fe1","#97c684", "#efc86e")), 
               color = "white") +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3, color="white") +
  scale_y_continuous(labels=scales::percent_format()) + 
  scale_x_discrete(limits=c("Previous Round", "Current Round"), expand = c(-0.2, -0.2)) + 
  scale_fill_manual(values=c("#97c684","#574571", "#808fe1", "#efc86e")) + 
  ggtitle("Cooking fuel switching from round to round") + 
  ggthemes::theme_clean() + 
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black", hjust=0),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

jharkhand_sankey_fig

# bihar -------

bihar_stacking_long <- bsurveys %>% 
  arrange(hhid, survey) %>% 
  group_by(hhid) %>% 
  mutate(stack_category_lead = lag(stack_category, 1)) %>% 
  ungroup() %>% 
  dplyr::select(hhid, survey, stack_category, stack_category_lead) %>% 
  group_by(stack_category_lead) %>% 
  summarize(`Exclusive biomass fuel` = sum(stack_category=="Exclusive biomass fuel", na.rm=T),
            `Clean and biomass fuel stacking` = sum(stack_category=="Clean and biomass fuel stacking", na.rm=T),
            `Exclusive clean fuel` = sum(stack_category=="Exclusive clean fuel", na.rm=T),
  ) %>% 
  pivot_longer(-stack_category_lead) %>% 
  dplyr::rename(`Previous Week` = stack_category_lead,
                `Current Week` = name) %>% 
  mutate(`Previous Week` = plyr::mapvalues(`Previous Week`, 
                                           from=c("Exclusive clean fuel", "Clean and biomass fuel stacking",
                                                  "Exclusive biomass fuel"),
                                           to=c("Exclusive\nLPG", "Clean,\npolluting\nstacking",
                                                "Exclusive\npolluting")),
         `Current Week` = plyr::mapvalues(`Current Week`, 
                                          from=c("Exclusive clean fuel", "Clean and biomass fuel stacking",
                                                 "Exclusive biomass fuel"),
                                          to=c("Exclusive\nLPG", "Clean,\npolluting\nstacking",
                                               "Exclusive\npolluting"))) %>% 
  mutate(`Previous Week` = factor(`Previous Week`, levels=c("Exclusive\nLPG", "Clean,\npolluting\nstacking",
                                                            "Exclusive\npolluting")),
         `Current Week` = factor(`Current Week`, levels=c("Exclusive\nLPG", "Clean,\npolluting\nstacking",
                                                          "Exclusive\npolluting"))) %>% 
  filter(!is.na(`Previous Week`)) %>% 
  mutate(percent = value/1418)


bihar_sankey_fig <- ggplot(bihar_stacking_long,
                           aes(axis1 = `Previous Week`, axis2 = `Current Week`, y=percent)) +
  geom_alluvium(aes(fill = c("#efc86e","#efc86e","#efc86e",
                             "#574571", "#574571","#574571", 
                             "#97c684", "#97c684", "#97c684" 
  )), width = 1/4) +
  geom_stratum(width = 1/4, fill = c("#efc86e","#97c684","#574571", 
                                     "#efc86e","#97c684","#574571"), color = "white") +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)),nudge_y = 0.03, size=3, color="white") +
  # geom_text(stat = "stratum", aes(label = after_stat(stratum)), size=3, color="white") +
  scale_y_continuous(labels=scales::percent_format()) + 
  scale_x_discrete(limits=c("Previous Week", "Current Week"), expand = c(-0.2, -0.2)) + 
  scale_fill_manual(values=c("#efc86e","#574571","#97c684")) + 
  ggtitle("Cooking fuel switching from week to week") + 
  ggthemes::theme_clean() + 
  coord_cartesian(ylim=c(0,1), clip="off") + 
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=10, color="black", hjust=0),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())


bihar_sankey_fig

# reasons for household energy use patterns ----------

# Jharkhand ------

jsurveys_like_lpg_stacks <- jsurveys_1 %>% 
  mutate(lpg_like_affordability = ifelse(g_3_1_2_like_using_lpg_more_cheapernow=="1" |
                                           g_3_1_4_like_using_lpg_more_threecylpolicy=="1", 1, 0),
         lpg_like_accessibility = ifelse(g_3_1_3_like_using_lpg_more_accessiblenow=="1" |
                                           g_3_1_6_like_using_lpg_more_biomassacquireharder=="1", 1, 0),
         lpg_like_cooking = ifelse(
           g_3_1_1_like_using_lpg_more_valuefastcooking=="1" |
             g_3_1_5_like_using_lpg_more_lessbiomassnow=="1" |
             g_3_1_7_like_using_lpg_more_tastepreferenceforlpg=="1" |
             g_3_1_8_like_using_lpg_more_tastepreferenceawayfirewood=="1" |
             g_3_1_9_like_using_lpg_more_prefercooklpg=="1" |
             g_3_1_10_like_using_lpg_more_awaycookfirewood=="1", 1, 0),
         
         
         lpg_likeless_affordability = ifelse(g_3_2_1_like_using_lpg_less_morecostly=="1", 1, 0),
         lpg_likeless_accessibility = ifelse(g_3_2_2_like_using_lpg_less_lessaccessible=="1", 1, 0),
         lpg_likeless_cooking = ifelse(g_3_2_4_like_using_lpg_less_tastepreferenceforfirewood=="1" |
                                         g_3_2_5_like_using_lpg_less_tastepreferenceawaylpg=="1" |
                                         g_3_2_6_like_using_lpg_less_prefercookfirewood=="1" |
                                         g_3_2_7_like_using_lpg_less_awaycooklpg=="1", 1, 0)) %>% 
  group_by(W_cookfuel_stack_category) %>% 
  summarize(lpg_like_affordability = mean(lpg_like_affordability, na.rm=T),
            lpg_like_accessibility = mean(lpg_like_accessibility, na.rm=T),
            lpg_like_cooking = mean(lpg_like_cooking, na.rm=T),
            
            lpg_likeless_affordability = mean(lpg_likeless_affordability, na.rm=T),
            lpg_likeless_accessibility = mean(lpg_likeless_accessibility, na.rm=T),
            lpg_likeless_cooking = mean(lpg_likeless_cooking, na.rm=T)) %>%
  pivot_longer(-W_cookfuel_stack_category,"name", "value") %>% 
  mutate(positive = ifelse(str_sub(name, 5, 9)=="like_", "positive", "negative")) %>% 
  mutate(name = plyr::mapvalues(name,
                                from=c("lpg_like_affordability",
                                       "lpg_like_accessibility",
                                       "lpg_like_cooking",
                                       
                                       "lpg_likeless_affordability",
                                       "lpg_likeless_accessibility",
                                       "lpg_likeless_cooking"),
                                to=c("LPG affordability",
                                     "Fuel accessibility (LPG better)",
                                     "Prefer LPG cooking",
                                     "LPG costliness                       ",
                                     "Fuel accessibility",
                                     "Prefer biomass cooking         "))) %>%
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = value + 0.07) %>% 
  mutate(name = factor(name, levels=c("LPG affordability",
                                      "Fuel accessibility (LPG better)",
                                      "Prefer LPG cooking",
                                      "LPG costliness                       ",
                                      "Fuel accessibility",
                                      "Prefer biomass cooking         ")))


# exclusive lpg -------
jsurveys_like_lpg_cat1 <- jsurveys_like_lpg_stacks %>% 
  filter(W_cookfuel_stack_category=="Exclusive LPG" &
           positive=="positive")

jharkhand_reasons_exclusive_lpg <-
  ggplot() + 
  geom_point(data=jsurveys_like_lpg_cat1, 
             aes(x=name, y=value, 
                 group=name, color=name), size=1.7) +
  geom_segment(data=jsurveys_like_lpg_cat1, 
               aes(x=name, xend=name, y=0, yend=value, group=name, color=name), size=0.7) +
  geom_text(data=jsurveys_like_lpg_cat1, 
            aes(x=name, y=value, 
                label=value_label), size=3, color="#574571", nudge_x = 0.4) + 
  ggtitle("Reasons for liking/disliking LPG or biomass more as compared to pre-pandemic") + 
  scale_color_manual(values=c("#574571", "#574571", "#574571",
                              "#574571", "#574571", "#574571"))  +
  # scale_fill_viridis_d() + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#574571", hjust=0),
        axis.text.x = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        plot.title.position = "plot",
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

jharkhand_reasons_exclusive_lpg

# primary lpg, secondary biomass ------

jsurveys_reasons_cat2 <- jsurveys_like_lpg_stacks %>% 
  filter(W_cookfuel_stack_category=="Primary LPG, Secondary polluting") %>% 
  mutate(name = factor(name, levels=c("Prefer biomass cooking         ",
                                      "Fuel accessibility",
                                      "LPG costliness                       ",
                                      "LPG affordability",
                                      "Fuel accessibility (LPG better)",
                                      "Prefer LPG cooking")))

jharkhand_reasons_cat2_lpg <-
  ggplot() + 
  geom_point(data=jsurveys_reasons_cat2 %>% 
               filter(positive=="positive"), 
             aes(x=name, y=value, 
                 group=name, color=name),  size=1.7) +
  geom_segment(data=jsurveys_reasons_cat2  %>% 
                 filter(positive=="positive"), 
               aes(x=name, xend=name, y=0, yend=value, group=name, color=name), size=0.7) +
  geom_point(data=jsurveys_reasons_cat2 %>% 
               filter(positive=="negative"), 
             aes(x=name, y=value, 
                 group=name, color=name), shape = 4, size=2) +
  geom_segment(data=jsurveys_reasons_cat2  %>% 
                 filter(positive=="negative"), 
               aes(x=name, xend=name, y=0, yend=value, group=name, color=name), size=0.7, linetype=2) +
  geom_text(data=jsurveys_reasons_cat2, 
            aes(x=name, y=value, 
                label=value_label), size=3, color="#808fe1", nudge_x=.4) + 
  # ggtitle("Reasons for liking LPG as compared to pre-pandemic") + 
  scale_color_manual(values=c("#808fe1", "#808fe1", "#808fe1",
                              "#808fe1", "#808fe1", "#808fe1")) + 
  scale_x_discrete(limits=c("Prefer biomass cooking         ",
                            "Fuel accessibility",
                            "LPG costliness                       ",
                            "LPG affordability",
                            "Fuel accessibility (LPG better)",
                            "Prefer LPG cooking")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#808fe1", hjust=0),
        axis.text.x = element_blank(),
        plot.title = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

jharkhand_reasons_cat2_lpg


# primary biomass, secondary lpg ------

jsurveys_reasons_cat3 <- jsurveys_like_lpg_stacks %>% 
  filter(W_cookfuel_stack_category=="Primary polluting, Secondary LPG") %>% 
  mutate(name = factor(name, levels=c("Prefer biomass cooking         ",
                                      "Fuel accessibility",
                                      "LPG costliness                       ",
                                      "LPG affordability",
                                      "Fuel accessibility (LPG better)",
                                      "Prefer LPG cooking")))

jsurveys_reasons_cat3_fig <-
  ggplot() + 
  geom_point(data=jsurveys_reasons_cat3 %>% 
               filter(positive=="positive"), 
             aes(x=name, y=value, 
                 group=name, color=name), size=1.7) +
  geom_segment(data=jsurveys_reasons_cat3  %>% 
                 filter(positive=="positive"), 
               aes(x=name, xend=name, y=0, yend=value, group=name, color=name), size=0.7) +
  geom_point(data=jsurveys_reasons_cat3  %>% 
               filter(positive=="negative"), 
             aes(x=name, y=value, 
                 group=name, color=name), shape = 4, size=2) +
  geom_segment(data=jsurveys_reasons_cat3  %>% 
                 filter(positive=="negative"), 
               aes(x=name, xend=name, y=0, yend=value, group=name, color=name), size=0.7, linetype=2) +
  geom_text(data=jsurveys_reasons_cat3, 
            aes(x=name, y=value, 
                label=value_label), size=3, nudge_x=.4, color="#97c684") + 
  scale_x_discrete(limits=c("Prefer biomass cooking         ",
                            "Fuel accessibility",
                            "LPG costliness                       ",
                            "LPG affordability",
                            "Fuel accessibility (LPG better)",
                            "Prefer LPG cooking")) + 
  # ggtitle("Reasons for liking LPG as compared to pre-pandemic") + 
  scale_color_manual(values=c("#97c684", "#97c684", "#97c684",
                              "#97c684", "#97c684", "#97c684")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#97c684", hjust=0),
        axis.text.x = element_blank(),
        plot.title = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

jsurveys_reasons_cat3_fig

# excluisve biomass ------

jsurveys_reasons_cat4 <- jsurveys_like_lpg_stacks %>% 
  filter(W_cookfuel_stack_category=="Exclusive polluting" &
           (name=="LPG costliness                       " | 
              name=="Prefer biomass cooking" |
              name=="Fuel accessibility")) %>% 
  mutate(name = factor(name, levels=c("Prefer biomass cooking         ",
                                      "Fuel accessibility",
                                      "LPG costliness                       ")))

jsurveys_reasons_cat4_fig <-
  ggplot() + 
  geom_point(data=jsurveys_reasons_cat4, 
             aes(x=name, y=value, 
                 group=name), size=2, color="#efc86e", shape = 4) +
  geom_segment(data=jsurveys_reasons_cat4, 
               aes(x=name, xend=name, y=0, yend=value, group=name), linetype=2, color="#efc86e", size=0.7) +
  geom_text(data=jsurveys_reasons_cat4, 
            aes(x=name, y=value, 
                label=value_label), color="#efc86e", size=3, nudge_x=0.4) + 
  # ggtitle("Reasons for liking LPG as compared to pre-pandemic") + 
  scale_color_manual(values=c("#efc86e", "#efc86e", "#efc86e",
                              "#efc86e", "#efc86e", "#efc86e")) + 
  # scale_fill_viridis_d() + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#efc86e", hjust=0),
        axis.text.x = element_text(size=9, color="black"),
        plot.title = element_text(size=9, color="#efc86e", face="bold"),
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

jsurveys_reasons_cat4_fig





# round by round (not used in main fig)--------


jsurveys_like_lpg <- jsurveys_1 %>% 
  group_by(round) %>% 
  summarize(lpg_likemore = mean(g_3_1_like_using_lpg_more=="1", na.rm=T),
            lpg_likesame = mean(g_3_1_like_using_lpg_more=="2", na.rm=T),
            lpg_likeless = mean(g_3_1_like_using_lpg_more=="3", na.rm=T),
            
            lpg_like_fastcook = mean(g_3_1_1_like_using_lpg_more_valuefastcooking=="1", na.rm=T),
            lpg_like_cheaper = mean(g_3_1_2_like_using_lpg_more_cheapernow=="1", na.rm=T),
            lpg_like_access = mean(g_3_1_3_like_using_lpg_more_accessiblenow=="1", na.rm=T),
            lpg_like_threecyl = mean(g_3_1_4_like_using_lpg_more_threecylpolicy=="1", na.rm=T),
            lpg_like_likebiomassless = mean(g_3_1_5_like_using_lpg_more_lessbiomassnow=="1", na.rm=T),
            lpg_like_biomassacquireharder = mean(g_3_1_6_like_using_lpg_more_biomassacquireharder=="1", na.rm=T),
            lpg_like_tastepreflpg = mean(g_3_1_7_like_using_lpg_more_tastepreferenceforlpg=="1", na.rm=T),
            lpg_like_tasteprefawayfirewood = mean(g_3_1_8_like_using_lpg_more_tastepreferenceawayfirewood=="1", na.rm=T),
            lpg_like_prefercooklpg = mean(g_3_1_9_like_using_lpg_more_prefercooklpg=="1", na.rm=T),
            lpg_like_awaycookfirewood = mean(g_3_1_10_like_using_lpg_more_awaycookfirewood=="1", na.rm=T),
            
            lpg_likeless_morecostly = mean(g_3_2_1_like_using_lpg_less_morecostly=="1", na.rm=T),
            lpg_likeless_lessaccessible = mean(g_3_2_2_like_using_lpg_less_lessaccessible=="1", na.rm=T),
            lpg_likeless_morebiomass = mean(g_3_2_3_like_using_lpg_less_morebiomassnow=="1", na.rm=T),
            lpg_likeless_tasteprefforfirewood = mean(g_3_2_4_like_using_lpg_less_tastepreferenceforfirewood=="1", na.rm=T),
            lpg_likeless_tasteprefawaylpg = mean(g_3_2_5_like_using_lpg_less_tastepreferenceawaylpg=="1", na.rm=T),
            lpg_likeless_perfercookfirewood = mean(g_3_2_6_like_using_lpg_less_prefercookfirewood=="1", na.rm=T),
            lpg_likeless_awaycooklpg = mean(g_3_2_7_like_using_lpg_less_awaycooklpg=="1", na.rm=T)) %>%
  pivot_longer(-round,"parameter", "value") 

# Bihar  -------

# exclusive lpg -------

bsurveys_reasons_exclusivelpg <- bsurveys %>% 
  mutate(only_lpg_cooking_quality = ifelse(exclusive_lpg_reasons_prefer_lpg_easier=="1" |
                                             exclusive_lpg_reasons_prefer_lpg_cleaner=="1" |
                                             exclusive_lpg_reasons_prefer_lpg_faster_otherthings=="1" |
                                             exclusive_lpg_reasons_no_biomass_itisdirty=="1" |
                                             exclusive_lpg_reasons_no_biomass_hurthealth=="1", 1, 0),
         only_lpg_availability = ifelse(exclusive_lpg_reasons_no_biomass_hardacquire=="1" |
                                          exclusive_lpg_reasons_nobiomass=="1", 1, 0),
         only_lpg_cost = ifelse(exclusive_lpg_reasons_enough_lpg_can_afford=="1", 1, 0)) %>% 
  summarize(only_lpg_cooking_quality = mean(only_lpg_cooking_quality, na.rm=T),
            only_lpg_availability = mean(only_lpg_availability, na.rm=T),
            only_lpg_cost = mean(only_lpg_cost, na.rm=T)) %>%
  pivot_longer(everything()) %>% 
  mutate(name = plyr::mapvalues(name,
                                from=c("only_lpg_cooking_quality",
                                       "only_lpg_availability",
                                       "only_lpg_cost"),
                                to=c("Prefer LPG cooking",
                                     "Fuel availability             ",
                                     "LPG affordability"))) %>%
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = value + 0.07) %>% 
  mutate(name = forcats::fct_reorder(name, value, rev))


b_reasons_exclusivelpg <- ggplot() + 
  geom_point(data=bsurveys_reasons_exclusivelpg, 
             aes(x=name, y=value, 
                 group=name), size=1.7, color="#574571") +
  geom_segment(data=bsurveys_reasons_exclusivelpg, 
               aes(x=name, xend=name, y=0, yend=value, group=name), color="#574571", size=0.7) +
  geom_text(data=bsurveys_reasons_exclusivelpg, 
            aes(x=name, y=value, 
                label=value_label), color="#574571", size=3, nudge_x=0.4) + 
  ggtitle("Reasons for using cooking fuels as reported") + 
  # scale_fill_viridis_d() + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#574571", hjust=0),
        axis.text.x = element_blank(),
        plot.title = element_text(size=10, color="black", face="bold"),
        plot.title.position = "plot",
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

b_reasons_exclusivelpg


# stacking -------
bsurveys_reasons_stacking_1 <- bsurveys %>% 
  mutate(stack_lpg_cooking_quality = ifelse(stacker_reasons_lpg_prefer_lpg_speed=="1" |
                                              stacker_reasons_lpg_prefer_lpg_clean=="1" |
                                              stacker_reasons_lpg_prefer_lpg_notsmoky=="1" |
                                              stacker_reasons_lpg_prefer_lpg_easy=="1", 1, 0),
         stack_lpg_availability = ifelse(stacker_reasons_lpg_had_enough_lpg=="1", 1, 0),
         
         stack_biomass_cooking_quality = ifelse(stacker_reasons_biomass_prefer_biomass_lpg_notpowerful=="1" |
                                                  stacker_reasons_biomass_prefer_biomass_cookoutside=="1" |
                                                  stacker_reasons_biomass_biomasspreferred=="1" |
                                                  stacker_reasons_biomass_prefer_biomass_tradition=="1", 1, 0),
         stack_biomass_availability = ifelse(stacker_reasons_biomass_had_enough_biomass=="1", 1, 0),
         stack_biomass_affordability = ifelse(stacker_reasons_biomass_prefer_biomass_tasks_lpg_expensive=="1" |
                                                stacker_reasons_biomass_rationlpg_expensive=="1", 1, 0)) %>% 
  summarize(stack_lpg_cooking_quality = mean(stack_lpg_cooking_quality, na.rm=T),
            stack_lpg_availability = mean(stack_lpg_availability, na.rm=T),
            stack_biomass_cooking_quality = mean(stack_biomass_cooking_quality, na.rm=T),
            stack_biomass_availability = mean(stack_biomass_availability, na.rm=T),
            stack_biomass_affordability = mean(stack_biomass_affordability, na.rm=T)) %>%
  pivot_longer(everything()) %>% 
  mutate(name = plyr::mapvalues(name,
                                from=c("stack_lpg_cooking_quality",
                                       "stack_lpg_availability",
                                       "stack_biomass_cooking_quality",
                                       "stack_biomass_availability",
                                       "stack_biomass_affordability"),
                                to=c("Prefer LPG cooking",
                                     "LPG available",
                                     "Prefer biomass cooking",
                                     "Biomass available",
                                     "LPG costliness"))) %>%
  mutate(positive = c("positive", "positive", "negative", "negative", "negative")) %>% 
  mutate(value_label = paste0(round(value*100, digits=0), "%")) %>% 
  mutate(name = factor(name, levels=c("Biomass available", 
                                      "Prefer biomass cooking",
                                      "LPG costliness",
                                      "LPG available",
                                      "Prefer LPG cooking")))


b_reasons_stacking <- ggplot() + 
  geom_point(data=bsurveys_reasons_stacking_1 %>% filter(positive=="positive"),
             aes(x=name, y=value,
                 group=name), size=1.7, color="#97c684") +
  geom_segment(data=bsurveys_reasons_stacking_1 %>% filter(positive=="positive"),
               aes(x=name, xend=name, y=0, yend=value, group=name), size=0.7, color="#97c684") +
  geom_point(data=bsurveys_reasons_stacking_1 %>% filter(positive=="negative"),
             aes(x=name, y=value,
                 group=name), size=2, color="#97c684", shape = 4) +
  geom_segment(data=bsurveys_reasons_stacking_1 %>% filter(positive=="negative"),
               aes(x=name, xend=name, y=0, yend=value, group=name), size=0.7, linetype=2, color="#97c684") +
  geom_text(data=bsurveys_reasons_stacking_1, 
            aes(x=name, y=value, 
                label=value_label), size=3, nudge_x=0.4, color="#97c684") + 
  ggsci::scale_color_aaas() + 
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_discrete(limits=c("Biomass available", 
                            "Prefer biomass cooking",
                            "LPG costliness",
                            "LPG available",
                            "Prefer LPG cooking")) + 
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#97c684", hjust=0),
        axis.text.x = element_blank(),
        plot.title = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

b_reasons_stacking

# Full, not used
bsurveys_reasons_stacking <- bsurveys %>% 
  summarize(stack_lpg_had_enough_lpg = mean(stacker_reasons_lpg_had_enough_lpg=="1", na.rm=T),
            stack_lpg_speed = mean(stacker_reasons_lpg_prefer_lpg_speed=="1", na.rm=T),
            stack_lpg_clean = mean(stacker_reasons_lpg_prefer_lpg_clean=="1", na.rm=T),
            stack_lpg_notsmoky = mean(stacker_reasons_lpg_prefer_lpg_notsmoky=="1", na.rm=T),
            stack_lpg_easy = mean(stacker_reasons_lpg_prefer_lpg_easy=="1", na.rm=T),
            
            stack_lpg_main_had_enough_lpg = mean(stacker_biggest_reason_lpg_had_enough_lpg=="1", na.rm=T),
            stack_lpg_main_speed = mean(stacker_biggest_reason_lpg_prefer_lpg_speed=="1", na.rm=T),
            stack_lpg_main_clean = mean(stacker_biggest_reason_lpg_prefer_lpg_clean=="1", na.rm=T),
            stack_lpg_main_notsmoky = mean(stacker_biggest_reason_lpg_prefer_lpg_notsmoky=="1", na.rm=T),
            stack_lpg_main_easy = mean(stacker_biggest_reason_lpg_prefer_lpg_easy=="1", na.rm=T),
            
            stack_biomass_had_enough_biomass = mean(stacker_reasons_biomass_had_enough_biomass=="1", na.rm=T),
            stack_biomass_lpg_expensive = mean(stacker_reasons_biomass_prefer_biomass_tasks_lpg_expensive=="1", na.rm=T),
            stack_biomass_lpg_notpowerful = mean(stacker_reasons_biomass_prefer_biomass_lpg_notpowerful=="1", na.rm=T),
            stack_biomass_cookoutside = mean(stacker_reasons_biomass_prefer_biomass_cookoutside=="1", na.rm=T),
            stack_biomass_biomasspreferred = mean(stacker_reasons_biomass_biomasspreferred=="1", na.rm=T),
            stack_biomass_biomasstradition = mean(stacker_reasons_biomass_prefer_biomass_tradition=="1", na.rm=T),
            stack_biomass_lpg_expensive_ration = mean(stacker_reasons_biomass_rationlpg_expensive=="1", na.rm=T),
            
            stack_biomass_main_had_enough_biomass = mean(stacker_biggest_reason_biomass_had_enough_biomass=="1", na.rm=T),
            stack_biomass_main_lpg_expensive = mean(stacker_biggest_reason_biomass_prefer_biomass_tasks_lpg_expensive=="1", na.rm=T),
            stack_biomass_main_lpg_notpowerful = mean(stacker_biggest_reason_biomass_prefer_biomass_lpg_notpowerful=="1", na.rm=T),
            stack_biomass_main_cookoutside = mean(stacker_biggest_reason_biomass_prefer_biomass_cookoutside=="1", na.rm=T),
            stack_biomass_main_biomasspreferred  = mean(stacker_biggest_reason_biomass_biomasspreferred=="1", na.rm=T),
            stack_biomass_main_biomasstradition = mean(stacker_biggest_reason_biomass_prefer_biomass_tradition=="1", na.rm=T),
            stack_biomass_main_lpg_expensive_ration = mean(stacker_biggest_reason_biomass_rationlpg_expensive=="1", na.rm=T)) %>%
  pivot_longer(everything()) 



# exclusive polluting -------

bsurveys_reasons_exclusivebiomass <- bsurveys %>% 
  mutate(only_biomass_cooking_quality = ifelse(exclusive_biomass_reasons_prefer_biomass_bettereasierfaster=="1" |
                                                 exclusive_biomass_reasons_prefer_biomass_taste=="1", 1, 0),
         only_biomass_availability = ifelse(exclusive_biomass_reasons_nolpg=="1" |
                                              exclusive_biomass_no_lpg_nolpg=="1", 1, 0),
         only_biomass_cost = ifelse(exclusive_biomass_reasons_prefer_biomass_cheaper_betterfinances=="1" |
                                      exclusive_biomass_reasons_no_lpg_tooexpensive=="1" |
                                      exclusive_biomass_no_lpg_ration_lpg_cost=="1" |
                                      exclusive_biomass_no_lpg_told_tooexpensive=="1", 1, 0)) %>% 
  summarize(only_biomass_cooking_quality = mean(only_biomass_cooking_quality, na.rm=T),
            only_biomass_availability = mean(only_biomass_availability, na.rm=T),
            only_biomass_cost = mean(only_biomass_cost, na.rm=T)) %>%
  pivot_longer(everything()) %>% 
  mutate(name = plyr::mapvalues(name,
                                from=c("only_biomass_cooking_quality",
                                       "only_biomass_availability",
                                       "only_biomass_cost"),
                                to=c("Prefer biomass cooking",
                                     "No LPG available",
                                     "Fuel affordability"))) %>%
  mutate(value_label = paste0(round(value*100, digits=0), "%"),
         label_position = value + 0.07) %>% 
  mutate(name = forcats::fct_reorder(name, value, rev))

b_reasons_exclusivebiomass <- ggplot() + 
  geom_point(data=bsurveys_reasons_exclusivebiomass, 
             aes(x=name, y=value, 
                 group=name), shape=4, size=2, color="#efc86e") +
  geom_segment(data=bsurveys_reasons_exclusivebiomass, 
               aes(x=name, xend=name, y=0, yend=value, group=name), size=0.7, color="#efc86e", linetype=2) +
  geom_text(data=bsurveys_reasons_exclusivebiomass, 
            aes(x=name, y=value, 
                label=value_label), size=3, nudge_x=0.4, color="#efc86e") + 
  ggsci::scale_color_aaas() + 
  scale_y_continuous(labels = scales::percent_format()) +
  coord_flip(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(size=9, color="#efc86e", hjust=0),
        axis.text.x = element_text(size=9, color="black"),
        plot.title = element_blank(),
        panel.grid.major.y = element_blank(),
        panel.grid.major.x = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

b_reasons_exclusivebiomass

# overall fuel stacks --------

j_cookfuel_stack <- jsurveys_1 %>% 
  summarize(exclusive_polluting = mean(W_cookfuel_stack_category=="Exclusive polluting", na.rm=T),
            primary_polluting_secondary_lpg = mean(W_cookfuel_stack_category=="Primary polluting, Secondary LPG", na.rm=T),
            primary_lpg_secondary_polluting = mean(W_cookfuel_stack_category=="Primary LPG, Secondary polluting", na.rm=T),
            exclusive_lpg = mean(W_cookfuel_stack_category=="Exclusive LPG", na.rm=T)) %>% 
  pivot_longer(everything()) %>% 
  mutate(name = plyr::mapvalues(name,
                                from=c("exclusive_polluting", 
                                       "primary_polluting_secondary_lpg",
                                       "primary_lpg_secondary_polluting",
                                       "exclusive_lpg"),
                                to=c("Exclusive polluting", "Primary polluting,\nSecondary LPG", "Primary LPG,\nSecondary polluting", "Exclusive LPG"))) %>% 
  mutate(name = factor(name, levels = c("Exclusive LPG", "Primary LPG,\nSecondary polluting", "Primary polluting,\nSecondary LPG", "Exclusive polluting"))) %>% 
  mutate(label_y = ifelse(name=="Exclusive polluting", 0.14, 
                          ifelse(name=="Primary polluting,\nSecondary LPG", 0.37,
                                 ifelse(name=="Primary LPG,\nSecondary polluting", 0.56, 0.85))))

j_fuel_stacks <- ggplot(j_cookfuel_stack, aes(x="Overall", y=value, fill=name)) + 
  geom_bar(stat="identity", color="white") + 
  geom_text(data=j_cookfuel_stack, 
            aes(x="Overall", y=label_y, 
                label=name), size=3, color="white") + 
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_manual(values=c("#574571","#808fe1","#97c684",  "#efc86e")) +
  ggtitle("Overall fuel stacking")  +
  coord_cartesian(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=9, color="black"),
        axis.text.y = element_text(size=9, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

j_fuel_stacks


b_cookfuel_stack <- bsurveys %>% 
  summarize(exclusive_polluting = mean(stack_category_exclusive_biomass==1, na.rm=T),
            stacking = mean(stack_category_stacker==1, na.rm=T),
            exclusive_lpg = mean(stack_category_exclusive_lpg==1, na.rm=T)) %>% 
  pivot_longer(everything()) %>% 
  mutate(name = plyr::mapvalues(name,
                                from=c("exclusive_polluting", 
                                       "stacking",
                                       "exclusive_lpg"),
                                to=c("Exclusive polluting", "Clean, polluting\nstacking", "Exclusive LPG"))) %>% 
  mutate(name = factor(name, levels = c("Exclusive LPG", "Clean, polluting\nstacking", "Exclusive polluting"))) %>% 
  mutate(label_y = ifelse(name=="Exclusive polluting", 0.03, 
                          ifelse(name=="Clean, polluting\nstacking", 0.37, 0.85)))

b_fuel_stacks <- ggplot(b_cookfuel_stack, aes(x="Overall", y=value, fill=name)) + 
  geom_bar(stat="identity", color="white") + 
  geom_text(data=b_cookfuel_stack, 
            aes(x="Overall", y=label_y, 
                label=name), size=3, color="white") + 
  scale_y_continuous(labels = scales::percent_format()) +
  ggtitle("Overall fuel stacking") + 
  scale_fill_manual(values=c("#574571","#97c684", "#efc86e")) + 
  coord_cartesian(ylim=c(0,1), clip="off") + 
  theme(axis.title = element_blank(),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_text(size=9, color="black"),
        axis.text.y = element_text(size=9, color="black"),
        plot.title = element_text(size=10, color="black", face="bold"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype="dotted", color="grey80", size=0.5),
        panel.background = element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=12, color="black"),
        legend.background = element_blank(),
        plot.background = element_blank())

b_fuel_stacks

# combine figures --------

# jharkhand ------


j_reasons_combined <- 
  plot_grid(
    jharkhand_reasons_exclusive_lpg,
    jharkhand_reasons_cat2_lpg,
    jsurveys_reasons_cat3_fig,
    jsurveys_reasons_cat4_fig,
    nrow=4,
    rel_heights=c(0.7, 1, 1, 0.7),
    align="v"
  )

j_stack_sankey <- 
  plot_grid(
    j_fuel_stacks,
    jharkhand_sankey_fig,
    nrow=1,
    rel_widths = c(0.5, 1),
    align = "hv",
    labels=c("a", "b")
  )

j_stack_sankey_reasons <- 
  plot_grid(
    j_stack_sankey,
    NULL,
    j_reasons_combined,
    nrow=1,
    rel_widths = c(0.9, 0.05, 1),
    align="hv",
    labels=c("","",  "c"),
    label_x = -0.05
  )


# bihar -----
b_reasons_combined <- 
  plot_grid(
    b_reasons_exclusivelpg,
    b_reasons_stacking,
    b_reasons_exclusivebiomass,
    nrow=3,
    rel_heights=c(0.7, 1, 0.7),
    align="v"
  )

b_stack_sankey <- 
  plot_grid(
    b_fuel_stacks, 
    bihar_sankey_fig,
    nrow=1,
    rel_widths = c(0.5, 1),
    labels=c("d", "e")
  )


b_stack_sankey_reasons <- 
  plot_grid(
    b_stack_sankey, 
    NULL,
    b_reasons_combined,
    nrow=1,
    rel_widths = c(0.9, 0.05, 1),
    labels=c("","",  "f"),
    label_x = -0.05
  )


# combine both --------

jb_stack_sankey_reasons <- plot_grid(
  NULL, j_stack_sankey_reasons,
  NULL, b_stack_sankey_reasons,
  nrow=4,
  rel_heights=c(0.06, 1, 0.06, 1),
  labels=c("Jharkhand", "", "Bihar", "")
)


# cowplot::ggsave2("~/BurkeLab Dropbox/Carlos Gould/Jharkhand-COVID19/Nature Energy/figure3.pdf",
#                  jb_stack_sankey_reasons,
#                  height = 270,
#                  width = 330,
#                  units = "mm",
#                  dpi=300)



